{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "Using BookNLP folder path from config.py -- /Users/sunyambagga/Desktop/txtLAB-2/minimal-narrativity/booknlp-output-narrativity/\n",
      "PLACE LEXICON has 1560 entries.\n",
      "Loading TMV features from: /Users/sunyambagga/Desktop/txtLAB-2/detecting-narrativity/pickles/tmv_features_lite_merged.pickle \n",
      "Size: 17313\n",
      "Top POS-TMV features: ['agenthood', 'vbd', 'nn', 'vbz', 'concreteness', '-rrb-', '-lrb-', 'jj', 'in', 'prp', 'dt', 'eventfulness', 'nns', 'setting', 'temporality', 'vbn', 'agency', 'vbp', 'cc', 'cd', 'feltness', 'wdt', 'coherence', 'nnp', 'md', 'rp', 'pct_quoted', 'temporal_order', 'to', 'vb', 'saying', 'rb', \"'\", 'pos', 'vbg', 'wp', '``', 'wrb', 'nnps', 'jjs', 'ex', 'jjr', 'rbr', 'rbs', 'sym', 'ls', 'pdt', 'fw', 'uh']\n",
      "\n",
      "----\n",
      "Using the two BookNLP paths: /Users/sunyambagga/Desktop/txtLAB-2/minimal-narrativity/booknlp-output-narrativity/ \n",
      " /Users/sunyambagga/Desktop/txtLAB-2/detecting-narrativity/booknlp-output-poetry/ \n",
      "----\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, cohen_kappa_score\n",
    "from scipy.stats.stats import spearmanr # pearsonr\n",
    "\n",
    "import sys\n",
    "sys.path.append('./classifier/')\n",
    "import data_loader\n",
    "import vectorizer\n",
    "import best_model\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import re\n",
    "import pickle\n",
    "import os\n",
    "import random\n",
    "seed_value= 42 # random seed of 42 for all experiments\n",
    "os.environ['PYTHONHASHSEED']=str(seed_value)\n",
    "random.seed(seed_value)\n",
    "np.random.seed(seed_value)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def metrics(THRESHOLD, map_fname_preds):\n",
    "    predictions, reader_binary = [], []\n",
    "    for fname in annotated_fnames:\n",
    "        reader_score = map_annotated_reader[fname]\n",
    "        if reader_score > THRESHOLD:\n",
    "            reader_binary.append('POS')\n",
    "        else:\n",
    "            reader_binary.append('NEG')            \n",
    "#         print(\"Prediction:\", map_fname_preds[fname], \"| Reader Score:\", reader_score)\n",
    "        predictions.append(map_fname_preds[fname])\n",
    "    f1 = round(f1_score(reader_binary, predictions, pos_label='POS'), 4)\n",
    "    prec = round(precision_score(reader_binary, predictions, pos_label='POS'), 4)\n",
    "    recall = round(recall_score(reader_binary, predictions, pos_label='POS'), 4)\n",
    "    kap = round(cohen_kappa_score(reader_binary, predictions), 4)\n",
    "    print(\"\\nThreshold =\", THRESHOLD)\n",
    "    print(\"F1 = {} | Kappa = {} | Precision = {} | Recall = {} | Total Items = {}\".format(f1, kap, prec, recall, len(predictions)))\n",
    "    return f1, kap\n",
    "    \n",
    "    \n",
    "def process(algo, N):\n",
    "    X_train, X_test = best_model.top_n_model(train_fnames, list(map_annotated_reader.keys()), N)\n",
    "    print(\"Train files:\", len(train_fnames), \"| Annotated files:\", len(annotated_fnames), X_train.shape, len(Y), \"|\", X_test.shape)\n",
    "\n",
    "    algo.fit(X_train, Y)\n",
    "    pred_probs = algo.predict_proba(X_test)\n",
    "    preds = algo.predict(X_test)\n",
    "#     print(\"Ordering:\", algo.classes_.tolist(), \"| Predictions:\", len(pred_probs), len(preds))\n",
    "\n",
    "\n",
    "    map_fname_probnarr = {}\n",
    "    map_fname_preds = {}\n",
    "    for fname, probs, pred in zip(annotated_fnames, pred_probs, preds):\n",
    "        prob_narr = probs[1] # second element (['NEG', 'POS'])\n",
    "        if prob_narr > 0.5:\n",
    "            if pred != 'POS':\n",
    "                print(pred, prob_narr)\n",
    "        else:\n",
    "            if pred != 'NEG':\n",
    "                print(pred, prob_narr)\n",
    "        map_fname_probnarr[fname] = prob_narr\n",
    "        map_fname_preds[fname] = pred\n",
    "\n",
    "\n",
    "    df1 = pd.read_csv('/Users/sunyambagga/Desktop/txtLAB-2/detecting-narrativity/data/450passages_poetry_nonfic_science.tsv', delimiter='\\t')\n",
    "    df2 = pd.read_csv('/Users/sunyambagga/Desktop/txtLAB-2/detecting-narrativity/data/novel19c_105passages.tsv', delimiter='\\t')[['GENRE', 'FILENAME', 'TEXT']]\n",
    "    df = pd.concat([df1, df2])\n",
    "    df['Avg_Reader_Score'] = df['FILENAME'].map(map_annotated_reader)\n",
    "    df['Probability_Narrative'] = df['FILENAME'].map(map_fname_probnarr)\n",
    "    df.dropna(inplace=True)\n",
    "    assert df.shape[0] == 416\n",
    "    \n",
    "    # Compute correlation:\n",
    "    corr, p = spearmanr(df['Probability_Narrative'].tolist(), df['Avg_Reader_Score'].tolist())\n",
    "    \n",
    "    print(\"\\n\\n\\nCorrelation - All Data:\", corr)\n",
    "    for genre in set(df['GENRE'].tolist()):\n",
    "        t = df.loc[df['GENRE']==genre]\n",
    "        print(\"\\t\", genre, \"--\", spearmanr(t['Probability_Narrative'].tolist(), t['Avg_Reader_Score'].tolist()))\n",
    "        \n",
    "        \n",
    "    # Compute metrics at threshold=2.5\n",
    "    f12, kap2 = metrics(2.5, map_fname_preds)\n",
    "    f13, kap3 = metrics(3, map_fname_preds)\n",
    "    \n",
    "    return str(corr), str(f12), str(kap2), str(f13), str(kap3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('/Users/sunyambagga/Desktop/txtLAB-2/detecting-narrativity/pickles/combined_hand_annotate_tense_mood_voice_features_lite.pickle', 'rb') as f:\n",
    "    hand_annotate = pickle.load(f) # created via pickle_features.py\n",
    "    \n",
    "p_fname = '/Users/sunyambagga/Desktop/txtLAB-2/detecting-narrativity/pickles/tense_mood_voice_features_lite.pickle'\n",
    "with open(p_fname, 'rb') as f:\n",
    "    TMV_FEATURES = pickle.load(f) # created via pickle_features.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "# genres initially: 19\n",
      "Post-filtering, # genres: 18\n",
      "(416, 30) (105, 5) (450, 3) | Total reader-annotated files: 971\n",
      "Total Mispredictions: 1090 | From: POS-TMV_13438_predictions.tsv\n",
      "Dataset size: (12348, 5)\n"
     ]
    }
   ],
   "source": [
    "train_fnames, Y = data_loader.load_data(discard_genres=['OPINION'], remove_annotated_passages=True, remove_mispreds=True)\n",
    "map_annotated_reader = data_loader.reader_annotated_dict()\n",
    "\n",
    "annotated_fnames = list(map_annotated_reader.keys())\n",
    "for fname in annotated_fnames:\n",
    "    try: assert fname in hand_annotate\n",
    "    except: assert fname in TMV_FEATURES"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 25 | TMV: ['setting', 'concreteness', 'agenthood', 'coh_seq', 'eventfulness', 'temporality', 'agency', 'feltness'] | POS: ['nnp', 'vbp', 'vbd', '-lrb-', 'prp', 'wdt', 'cc', 'vbz', 'cd', '-rrb-', 'nns', 'in', 'md', 'nn', 'jj', 'vbn', 'dt']\n",
      "POS Train: (12348, 17) | POS Test: (416, 17) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 25) 12348 | (416, 25)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6355094862413104\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5144905720022144, pvalue=1.0477157425902228e-08)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.49354359899614564, pvalue=1.813606900318398e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5452236504215129, pvalue=3.120260037817516e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3453063215456187, pvalue=0.00030903053293574205)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7419 | Kappa = 0.4676 | Precision = 0.668 | Recall = 0.8342 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6028 | Kappa = 0.3675 | Precision = 0.444 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 26 | TMV: ['setting', 'concreteness', 'agenthood', 'coh_seq', 'eventfulness', 'temporality', 'agency', 'feltness'] | POS: ['nnp', 'vbp', 'vbd', '-lrb-', 'prp', 'jj', 'wdt', 'vbz', 'cd', 'cc', '-rrb-', 'nns', 'in', 'md', 'nn', 'rp', 'vbn', 'dt']\n",
      "POS Train: (12348, 18) | POS Test: (416, 18) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 26) 12348 | (416, 26)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6410084543874386\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5282350559079826, pvalue=3.5657164837758793e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5094973996100531, pvalue=6.175395676431167e-08)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5518105595965918, pvalue=1.8400850256858273e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.34241701681838543, pvalue=0.0003496266130832951)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7419 | Kappa = 0.4676 | Precision = 0.668 | Recall = 0.8342 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6085 | Kappa = 0.3764 | Precision = 0.4481 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 27 | TMV: ['pct_quoted', 'setting', 'concreteness', 'agenthood', 'coh_seq', 'eventfulness', 'temporality', 'agency', 'feltness'] | POS: ['nnp', 'vbp', 'vbd', '-lrb-', 'prp', 'jj', 'wdt', 'vbz', 'cd', 'cc', '-rrb-', 'nns', 'in', 'md', 'nn', 'rp', 'vbn', 'dt']\n",
      "POS Train: (12348, 18) | POS Test: (416, 18) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 27) 12348 | (416, 27)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6406481691504936\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5444433790953991, pvalue=9.388573752457907e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.49617023582345754, pvalue=1.5245476981625318e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5495206002147758, pvalue=2.213791063357699e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3331841894479962, pvalue=0.000514595235811617)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7454 | Kappa = 0.4768 | Precision = 0.6736 | Recall = 0.8342 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6119 | Kappa = 0.3829 | Precision = 0.4519 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 28 | TMV: ['pct_quoted', 'setting', 'concreteness', 'temporal_order', 'agenthood', 'coh_seq', 'eventfulness', 'temporality', 'agency', 'feltness'] | POS: ['nnp', 'vbp', 'vbd', '-lrb-', 'prp', 'jj', 'wdt', 'vbz', 'cd', 'cc', '-rrb-', 'nns', 'in', 'md', 'nn', 'rp', 'vbn', 'dt']\n",
      "POS Train: (12348, 18) | POS Test: (416, 18) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 28) 12348 | (416, 28)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6323587046354058\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5049243227601946, pvalue=2.1577883550550623e-08)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4924085398554793, pvalue=1.9540345367227731e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5472272959721697, pvalue=2.6604133142091176e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.32514212185360336, pvalue=0.0007136582269665209)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7385 | Kappa = 0.4585 | Precision = 0.6626 | Recall = 0.8342 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.605 | Kappa = 0.37 | Precision = 0.4444 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 29 | TMV: ['pct_quoted', 'setting', 'concreteness', 'temporal_order', 'agenthood', 'coh_seq', 'eventfulness', 'temporality', 'agency', 'feltness'] | POS: ['wdt', 'vbz', 'jj', 'rp', 'vbn', 'nnp', 'vbd', 'prp', 'cd', 'nns', 'to', 'cc', '-rrb-', '-lrb-', 'dt', 'vbp', 'in', 'nn', 'md']\n",
      "POS Train: (12348, 19) | POS Test: (416, 19) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rp', 'rrb', 'to', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 29) 12348 | (416, 29)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6355010663016364\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5175671036445708, pvalue=8.265414507493713e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4981575332728461, pvalue=1.335583800960534e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5492088176965161, pvalue=2.2699838047736825e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3383679869017569, pvalue=0.00041481927531092475)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7431 | Kappa = 0.468 | Precision = 0.6667 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6106 | Kappa = 0.379 | Precision = 0.4486 | Recall = 0.9561 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 30 | TMV: ['pct_quoted', 'setting', 'concreteness', 'temporal_order', 'agenthood', 'coh_seq', 'eventfulness', 'temporality', 'agency', 'feltness'] | POS: ['vb', 'wdt', 'vbz', 'rp', 'jj', 'vbn', 'nnp', 'vbd', 'prp', 'cd', 'nns', 'to', 'cc', '-rrb-', '-lrb-', 'dt', 'vbp', 'in', 'nn', 'md']\n",
      "POS Train: (12348, 20) | POS Test: (416, 20) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 30) 12348 | (416, 30)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6372546065559551\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5070859846441462, pvalue=1.836324585917056e-08)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5004805967535103, pvalue=1.1429514259336993e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5470268934168857, pvalue=2.7033052697629955e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.33074903652921944, pvalue=0.0005686942159609325)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7402 | Kappa = 0.4631 | Precision = 0.6653 | Recall = 0.8342 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6124 | Kappa = 0.3822 | Precision = 0.4504 | Recall = 0.9561 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 31 | TMV: ['pct_quoted', 'setting', 'concreteness', 'temporal_order', 'agenthood', 'coh_seq', 'eventfulness', 'saying', 'temporality', 'agency', 'feltness'] | POS: ['vb', 'wdt', 'vbz', 'rp', 'jj', 'vbn', 'nnp', 'vbd', 'prp', 'cd', 'nns', 'to', 'cc', '-rrb-', '-lrb-', 'dt', 'vbp', 'in', 'nn', 'md']\n",
      "POS Train: (12348, 20) | POS Test: (416, 20) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 31) 12348 | (416, 31)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6327759914670036\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.4946785578706323, pvalue=4.566134244842648e-08)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5077533148739434, pvalue=6.965998583941843e-08)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5401426144708469, pvalue=4.653260180284899e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.33464267481010945, pvalue=0.00048449971385285486)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7397 | Kappa = 0.4589 | Precision = 0.6612 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6017 | Kappa = 0.3637 | Precision = 0.4408 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 32 | TMV: ['pct_quoted', 'setting', 'concreteness', 'temporal_order', 'agenthood', 'coh_seq', 'eventfulness', 'saying', 'temporality', 'agency', 'feltness'] | POS: ['vb', 'wdt', 'vbz', 'rp', 'jj', 'vbn', 'nnp', 'vbd', 'prp', 'rb', 'cd', 'nns', 'to', 'cc', '-rrb-', '-lrb-', 'dt', 'vbp', 'in', 'nn', 'md']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 21) | POS Test: (416, 21) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 32) 12348 | (416, 32)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6289871660449226\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5046076613488313, pvalue=2.2091798329351697e-08)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4951618165593643, pvalue=1.6299118051642458e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5307115419302809, pvalue=9.602708551039955e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.29134548229472984, pvalue=0.0025666837003695234)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7397 | Kappa = 0.4589 | Precision = 0.6612 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6072 | Kappa = 0.3726 | Precision = 0.4449 | Recall = 0.9561 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 33 | TMV: ['pct_quoted', 'setting', 'concreteness', 'temporal_order', 'agenthood', 'coh_seq', 'eventfulness', 'saying', 'temporality', 'agency', 'feltness'] | POS: ['vb', 'wdt', 'vbz', 'rp', 'jj', 'vbn', 'nnp', 'vbd', 'prp', 'rb', 'cd', 'nns', 'to', 'cc', '-rrb-', \"'\", '-lrb-', 'dt', 'vbp', 'in', 'nn', 'md']\n",
      "POS Train: (12348, 21) | POS Test: (416, 21) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 32) 12348 | (416, 32)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6289871660449226\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5046076613488313, pvalue=2.2091798329351697e-08)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4951618165593643, pvalue=1.6299118051642458e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5307115419302809, pvalue=9.602708551039955e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.29134548229472984, pvalue=0.0025666837003695234)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7397 | Kappa = 0.4589 | Precision = 0.6612 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6072 | Kappa = 0.3726 | Precision = 0.4449 | Recall = 0.9561 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 34 | TMV: ['pct_quoted', 'setting', 'concreteness', 'temporal_order', 'agenthood', 'coh_seq', 'eventfulness', 'saying', 'temporality', 'agency', 'feltness'] | POS: ['vb', 'wdt', 'vbz', 'rp', 'jj', 'vbn', 'nnp', 'vbd', 'prp', 'rb', 'cd', 'nns', 'to', 'pos', 'cc', '-rrb-', \"'\", '-lrb-', 'dt', 'vbp', 'in', 'nn', 'md']\n",
      "POS Train: (12348, 22) | POS Test: (416, 22) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 33) 12348 | (416, 33)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6356257007699252\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5099065242661371, pvalue=1.4852558486508552e-08)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5001186566050899, pvalue=1.1711157681266848e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5520710059542299, pvalue=1.8016349446957807e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3239658175794886, pvalue=0.0007480694522995959)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7385 | Kappa = 0.4585 | Precision = 0.6626 | Recall = 0.8342 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.605 | Kappa = 0.37 | Precision = 0.4444 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 35 | TMV: ['pct_quoted', 'setting', 'concreteness', 'temporal_order', 'agenthood', 'coh_seq', 'eventfulness', 'saying', 'temporality', 'agency', 'feltness'] | POS: ['vb', 'wdt', 'vbz', 'rp', 'jj', 'vbn', 'nnp', 'vbd', 'prp', 'rb', 'cd', 'vbg', 'nns', 'to', 'pos', 'cc', '-rrb-', \"'\", 'dt', '-lrb-', 'vbp', 'in', 'nn', 'md']\n",
      "POS Train: (12348, 23) | POS Test: (416, 23) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 34) 12348 | (416, 34)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6295356832814881\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.49251343344299864, pvalue=5.333174625842892e-08)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4906224514945588, pvalue=2.1961504089800849e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5435337922781345, pvalue=3.56645113849042e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3054037473411692, pvalue=0.001534672953979677)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7385 | Kappa = 0.4585 | Precision = 0.6626 | Recall = 0.8342 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.605 | Kappa = 0.37 | Precision = 0.4444 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 36 | TMV: ['pct_quoted', 'setting', 'concreteness', 'temporal_order', 'agenthood', 'coh_seq', 'eventfulness', 'saying', 'temporality', 'agency', 'feltness'] | POS: ['vb', 'wdt', 'vbz', 'rp', 'jj', 'vbn', 'nnp', 'vbd', 'prp', 'rb', 'wp', 'cd', 'vbg', 'nns', 'to', 'pos', 'cc', '-rrb-', \"'\", 'dt', '-lrb-', 'vbp', 'in', 'nn', 'md']\n",
      "POS Train: (12348, 24) | POS Test: (416, 24) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt', 'wp']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 35) 12348 | (416, 35)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6305963973832924\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.507688923883392, pvalue=1.7551785643809628e-08)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.48588928013251304, pvalue=2.983398188932968e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5461011893776392, pvalue=2.9101871194219727e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3148978669643174, pvalue=0.0010687594234373076)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.738 | Kappa = 0.4543 | Precision = 0.6585 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6056 | Kappa = 0.3694 | Precision = 0.4431 | Recall = 0.9561 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 37 | TMV: ['pct_quoted', 'setting', 'concreteness', 'temporal_order', 'agenthood', 'coh_seq', 'eventfulness', 'saying', 'temporality', 'agency', 'feltness'] | POS: ['vb', 'wdt', 'vbz', 'rp', 'jj', 'vbn', 'nnp', 'vbd', 'prp', 'rb', 'wp', 'cd', 'vbg', 'nns', 'to', 'pos', '``', 'cc', '-rrb-', \"'\", 'dt', '-lrb-', 'vbp', 'in', 'nn', 'md']\n",
      "POS Train: (12348, 25) | POS Test: (416, 25) | POS feature-columns: ['``', 'cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt', 'wp']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 36) 12348 | (416, 36)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.635207323058438\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.49205872354315633, pvalue=5.509211683300413e-08)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.49700158065241407, pvalue=1.4425935732963766e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5500502595330848, pvalue=2.1213755774913714e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.33536531179381096, pvalue=0.00047019536642474945)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7397 | Kappa = 0.4589 | Precision = 0.6612 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6017 | Kappa = 0.3637 | Precision = 0.4408 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 38 | TMV: ['pct_quoted', 'setting', 'concreteness', 'temporal_order', 'agenthood', 'coh_seq', 'eventfulness', 'saying', 'temporality', 'agency', 'feltness'] | POS: ['vb', 'wdt', 'vbz', 'rp', 'jj', 'vbn', 'nnp', 'vbd', 'prp', 'rb', 'wp', 'cd', 'vbg', 'nns', 'to', 'pos', '``', 'cc', '-rrb-', \"'\", 'wrb', 'dt', '-lrb-', 'vbp', 'in', 'nn', 'md']\n",
      "POS Train: (12348, 26) | POS Test: (416, 26) | POS feature-columns: ['``', 'cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt', 'wp', 'wrb']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 37) 12348 | (416, 37)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6361889487303735\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5178676613709019, pvalue=8.075126713756978e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5022579176441365, pvalue=1.0137609712709483e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5484896694953956, pvalue=2.4048604642168794e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.31385329948765867, pvalue=0.0011128102113443113)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7385 | Kappa = 0.4585 | Precision = 0.6626 | Recall = 0.8342 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.605 | Kappa = 0.37 | Precision = 0.4444 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 39 | TMV: ['pct_quoted', 'setting', 'concreteness', 'temporal_order', 'agenthood', 'coh_seq', 'eventfulness', 'saying', 'temporality', 'agency', 'feltness'] | POS: ['vb', 'wdt', 'nnps', 'vbz', 'rp', 'jj', 'vbn', 'nnp', 'vbd', 'prp', 'rb', 'wp', 'cd', 'vbg', 'nns', 'to', 'pos', '``', 'cc', '-rrb-', \"'\", 'wrb', 'dt', '-lrb-', 'vbp', 'in', 'nn', 'md']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 27) | POS Test: (416, 27) | POS feature-columns: ['``', 'cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nnps', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt', 'wp', 'wrb']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 38) 12348 | (416, 38)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6292083931721005\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5145217863164596, pvalue=1.0452105226728726e-08)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4883608442012776, pvalue=2.543815536856336e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5478862929411824, pvalue=2.523922143717912e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3118747696690519, pvalue=0.001200809109903508)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7373 | Kappa = 0.4581 | Precision = 0.6639 | Recall = 0.829 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6028 | Kappa = 0.3675 | Precision = 0.444 | Recall = 0.9386 | Total Items = 416\n"
     ]
    }
   ],
   "source": [
    "algo = RandomForestClassifier(n_estimators=500, max_depth=None, random_state=seed_value) # the best pos-TMV parameters\n",
    "with open('/Users/sunyambagga/Desktop/txtLAB-2/detecting-narrativity/results/PctQuoted_WithQuotation_topN_reader_annotated_rf.tsv', 'w') as F:\n",
    "    F.write(\"Top-N\\tSpearman-Correlation\\tF1-2.5\\tKappa-2.5\\tF1-3\\tKappa-3\\n\")\n",
    "    for n in range(25,40):\n",
    "        print(\"\\n###########################################\\n\")\n",
    "        corr, f12, kap2, f13, kap3 = process(algo, n)\n",
    "        F.write(str(n)+'\\t'+corr+'\\t'+f12+'\\t'+kap2+'\\t'+f13+'\\t'+kap3+'\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 1 | TMV: ['agenthood'] | POS: []\n",
      "Train files: 12348 | Annotated files: 416 (12348, 1) 12348 | (416, 1)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.45132478848495694\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.4819275990047986, pvalue=2.9217483120943107e-07)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.14036696331255205, pvalue=0.16364133979008577)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.286731442766798, pvalue=0.0025045604414793998)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.03474379538115243, pvalue=0.7249413679251852)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.6573 | Kappa = 0.3001 | Precision = 0.5975 | Recall = 0.7306 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5143 | Kappa = 0.2296 | Precision = 0.3814 | Recall = 0.7895 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 2 | TMV: ['agenthood'] | POS: ['vbd']\n",
      "POS Train: (12348, 1) | POS Test: (416, 1) | POS feature-columns: ['vbd']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 2) 12348 | (416, 2)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.5672853159834677\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5154243794803465, pvalue=2.967602039519219e-08)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.3498872641692305, pvalue=0.00035921220519631255)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.4075232529370036, pvalue=1.0920342803127963e-05)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.26461379219423764, pvalue=0.0063752365915842515)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7224 | Kappa = 0.4579 | Precision = 0.6869 | Recall = 0.7617 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5854 | Kappa = 0.3546 | Precision = 0.4486 | Recall = 0.8421 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 3 | TMV: ['agenthood'] | POS: ['nn', 'vbd']\n",
      "POS Train: (12348, 2) | POS Test: (416, 2) | POS feature-columns: ['nn', 'vbd']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 3) 12348 | (416, 3)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.5924932994627192\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5422113244399479, pvalue=3.957662526762997e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4181195453776211, pvalue=1.500238007969895e-05)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.563582062287436, pvalue=1.7674211019850558e-10)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.2327479646768043, pvalue=0.016879629557900247)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7351 | Kappa = 0.4697 | Precision = 0.6814 | Recall = 0.7979 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5941 | Kappa = 0.3615 | Precision = 0.4469 | Recall = 0.886 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 4 | TMV: ['concreteness', 'agenthood'] | POS: ['nn', 'vbd']\n",
      "POS Train: (12348, 2) | POS Test: (416, 2) | POS feature-columns: ['nn', 'vbd']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 4) 12348 | (416, 4)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6586692232474668\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5943986652088797, pvalue=4.509347110109924e-11)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5552238966469635, pvalue=2.0336413517283724e-09)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6110904927122618, pvalue=1.7040888946347513e-12)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3338342722711799, pvalue=0.0005009749410215876)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7657 | Kappa = 0.5194 | Precision = 0.6933 | Recall = 0.8549 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6136 | Kappa = 0.3862 | Precision = 0.4538 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 5 | TMV: ['concreteness', 'agenthood'] | POS: ['nn', 'vbz', 'vbd']\n",
      "POS Train: (12348, 3) | POS Test: (416, 3) | POS feature-columns: ['nn', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 5) 12348 | (416, 5)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.659813274723801\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.6355771873334887, pvalue=7.168722489361438e-13)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.501964726847106, pvalue=1.0340682366301998e-07)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6129914175975414, pvalue=1.3923788630377626e-12)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3467312360032583, pvalue=0.0002906535607362562)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7692 | Kappa = 0.5286 | Precision = 0.6992 | Recall = 0.8549 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6229 | Kappa = 0.4018 | Precision = 0.4619 | Recall = 0.9561 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 6 | TMV: ['concreteness', 'agenthood'] | POS: ['nn', '-lrb-', 'vbz', 'vbd']\n",
      "POS Train: (12348, 4) | POS Test: (416, 4) | POS feature-columns: ['lrb', 'nn', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 6) 12348 | (416, 6)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6438674299438792\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5979141505433893, pvalue=3.2395131020919384e-11)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5057002223042502, pvalue=8.020439639246423e-08)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5536198410049137, pvalue=4.271499528959364e-10)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.31917012041882037, pvalue=0.0009046489455474086)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.75 | Kappa = 0.4782 | Precision = 0.668 | Recall = 0.8549 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5983 | Kappa = 0.3573 | Precision = 0.4372 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 7 | TMV: ['concreteness', 'agenthood'] | POS: ['nn', '-lrb-', 'vbz', 'vbd', '-rrb-']\n",
      "POS Train: (12348, 5) | POS Test: (416, 5) | POS feature-columns: ['lrb', 'nn', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 7) 12348 | (416, 7)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6483795718233653\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5998584911737268, pvalue=2.6933212400274437e-11)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5348125395544236, pvalue=9.936966228989192e-09)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5247198164178701, pvalue=4.719044305646e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.35122332305795784, pvalue=0.0002391156887086647)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7506 | Kappa = 0.4824 | Precision = 0.6721 | Recall = 0.8497 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6034 | Kappa = 0.3668 | Precision = 0.4426 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 8 | TMV: ['concreteness', 'agenthood'] | POS: ['nn', '-lrb-', 'vbz', 'in', 'vbd', '-rrb-']\n",
      "POS Train: (12348, 6) | POS Test: (416, 6) | POS feature-columns: ['in', 'lrb', 'nn', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 8) 12348 | (416, 8)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6346705498938419\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5628862755754257, pvalue=7.375246738502388e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.48816587326532246, pvalue=2.5761213108302886e-07)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.51115091917615, pvalue=1.3516988943695008e-08)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.35798836721630506, pvalue=0.00017723454123239404)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7455 | Kappa = 0.4687 | Precision = 0.664 | Recall = 0.8497 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6039 | Kappa = 0.3662 | Precision = 0.4413 | Recall = 0.9561 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 9 | TMV: ['concreteness', 'agenthood'] | POS: ['nn', '-lrb-', 'vbz', 'in', 'vbd', 'jj', '-rrb-']\n",
      "POS Train: (12348, 7) | POS Test: (416, 7) | POS feature-columns: ['in', 'jj', 'lrb', 'nn', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 9) 12348 | (416, 9)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.63479441647593\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5597937021294708, pvalue=9.552422843127594e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4988600555760212, pvalue=1.2742887652835808e-07)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5080546574501796, pvalue=1.7076412641033654e-08)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.32221006643268324, pvalue=0.0008022623538740743)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7364 | Kappa = 0.4497 | Precision = 0.6559 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5928 | Kappa = 0.3485 | Precision = 0.4332 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 10 | TMV: ['concreteness', 'agenthood'] | POS: ['nn', '-lrb-', 'vbz', 'in', 'prp', 'vbd', 'jj', '-rrb-']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 8) | POS Test: (416, 8) | POS feature-columns: ['in', 'jj', 'lrb', 'nn', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 10) 12348 | (416, 10)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6362908815811394\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5781887723199248, pvalue=1.9702963049273205e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.49124566221566884, pvalue=2.108598672554435e-07)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5541749851230708, pvalue=4.0696360997903005e-10)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.2599541878443464, pvalue=0.007405053137574425)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7438 | Kappa = 0.4642 | Precision = 0.6613 | Recall = 0.8497 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5967 | Kappa = 0.3542 | Precision = 0.4355 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 11 | TMV: ['concreteness', 'agenthood', 'eventfulness'] | POS: ['nn', '-lrb-', 'vbz', 'in', 'prp', 'vbd', 'jj', '-rrb-']\n",
      "POS Train: (12348, 8) | POS Test: (416, 8) | POS feature-columns: ['in', 'jj', 'lrb', 'nn', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 11) 12348 | (416, 11)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6482134328091858\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5798117679200361, pvalue=1.7060299659117451e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5051004203853668, pvalue=8.356145828069243e-08)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5524176249737313, pvalue=4.74219626820681e-10)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3380056970466788, pvalue=0.000421166701439285)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7397 | Kappa = 0.4589 | Precision = 0.6612 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5961 | Kappa = 0.3548 | Precision = 0.4367 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 12 | TMV: ['concreteness', 'agenthood', 'eventfulness'] | POS: ['dt', 'nn', '-lrb-', 'vbz', 'in', 'prp', 'vbd', 'jj', '-rrb-']\n",
      "POS Train: (12348, 9) | POS Test: (416, 9) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 12) 12348 | (416, 12)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.634948726755436\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.571851217816471, pvalue=3.431718884729748e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5012057151259927, pvalue=1.08845555770563e-07)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5506403435447629, pvalue=5.530516161331169e-10)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.26376492147031766, pvalue=0.006552800251237923)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7465 | Kappa = 0.4771 | Precision = 0.6722 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6028 | Kappa = 0.3675 | Precision = 0.444 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 13 | TMV: ['concreteness', 'agenthood', 'eventfulness'] | POS: ['dt', 'nn', '-lrb-', 'vbz', 'in', 'prp', 'vbd', 'jj', 'nns', '-rrb-']\n",
      "POS Train: (12348, 10) | POS Test: (416, 10) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 13) 12348 | (416, 13)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6268597989162042\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5591528676393078, pvalue=1.0075065484284163e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4973182624200117, pvalue=1.4124924330419076e-07)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5316808313817057, pvalue=2.700778586521869e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.2731726675098785, pvalue=0.004809614809002111)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.738 | Kappa = 0.4543 | Precision = 0.6585 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5944 | Kappa = 0.3516 | Precision = 0.435 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 14 | TMV: ['setting', 'concreteness', 'agenthood', 'eventfulness'] | POS: ['dt', 'nn', '-lrb-', 'vbz', 'in', 'prp', 'vbd', 'jj', 'nns', '-rrb-']\n",
      "POS Train: (12348, 10) | POS Test: (416, 10) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 14) 12348 | (416, 14)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6403404916578438\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5695942531560264, pvalue=4.169657538491392e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4903403098292376, pvalue=2.2369142567074633e-07)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5644711884350826, pvalue=1.6312295948712186e-10)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3069732725171362, pvalue=0.0014467484767559464)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7443 | Kappa = 0.4684 | Precision = 0.6653 | Recall = 0.8446 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6017 | Kappa = 0.3637 | Precision = 0.4408 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 15 | TMV: ['concreteness', 'setting', 'agency', 'agenthood', 'eventfulness'] | POS: ['dt', 'nn', '-lrb-', 'vbz', 'in', 'prp', 'vbd', 'jj', 'nns', '-rrb-']\n",
      "POS Train: (12348, 10) | POS Test: (416, 10) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 15) 12348 | (416, 15)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6328625575996555\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5424435530989249, pvalue=3.886111308898027e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.47913941708435426, pvalue=4.581636340260288e-07)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5314535311174531, pvalue=2.7509954209349932e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.33821740926721305, pvalue=0.00041744667347486255)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7306 | Kappa = 0.4399 | Precision = 0.6531 | Recall = 0.829 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6017 | Kappa = 0.3637 | Precision = 0.4408 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 16 | TMV: ['concreteness', 'setting', 'agency', 'agenthood', 'eventfulness'] | POS: ['dt', 'nn', '-lrb-', 'vbz', 'in', 'prp', 'vbn', 'vbd', 'jj', 'nns', '-rrb-']\n",
      "POS Train: (12348, 11) | POS Test: (416, 11) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 16) 12348 | (416, 16)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.632951702561362\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5545355315837623, pvalue=1.473983592055501e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4691659964953039, pvalue=8.493862107993024e-07)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5364680532737187, pvalue=1.8264064052400845e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3369363174025788, pvalue=0.0004404273985704462)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7414 | Kappa = 0.4634 | Precision = 0.6639 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6034 | Kappa = 0.3668 | Precision = 0.4426 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 17 | TMV: ['concreteness', 'setting', 'agency', 'agenthood', 'eventfulness'] | POS: ['vbp', 'dt', 'nn', '-lrb-', 'vbz', 'in', 'prp', 'vbn', 'vbd', 'jj', 'nns', '-rrb-']\n",
      "POS Train: (12348, 12) | POS Test: (416, 12) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 17) 12348 | (416, 17)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6379921694617067\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5475657789577273, pvalue=2.5894460669401353e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4906913026618243, pvalue=2.186310585360272e-07)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5376807096555625, pvalue=1.652492115738014e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3314167896009721, pvalue=0.0005533625508242603)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7465 | Kappa = 0.4771 | Precision = 0.6722 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6085 | Kappa = 0.3764 | Precision = 0.4481 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 18 | TMV: ['concreteness', 'setting', 'agency', 'agenthood', 'eventfulness', 'temporality'] | POS: ['vbp', 'dt', 'nn', '-lrb-', 'vbz', 'in', 'prp', 'vbn', 'vbd', 'jj', 'nns', '-rrb-']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 12) | POS Test: (416, 12) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 18) 12348 | (416, 18)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6434099454270993\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5704030794132344, pvalue=3.8891965400878947e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.49319609431635647, pvalue=1.855545872575316e-07)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5329600503634191, pvalue=2.434167232825705e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.34674773786399227, pvalue=0.0002904467740444277)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7448 | Kappa = 0.4726 | Precision = 0.6694 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6067 | Kappa = 0.3732 | Precision = 0.4463 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 19 | TMV: ['concreteness', 'setting', 'agency', 'agenthood', 'eventfulness', 'temporality'] | POS: ['vbp', 'dt', 'prp', '-lrb-', 'vbz', 'in', 'nn', 'vbn', 'vbd', 'jj', 'nns', 'cd', '-rrb-']\n",
      "POS Train: (12348, 13) | POS Test: (416, 13) | POS feature-columns: ['cd', 'dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 19) 12348 | (416, 19)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.641904574516912\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5750727173427046, pvalue=2.5921418292185937e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.47751112936466616, pvalue=5.074228253389872e-07)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.53431055055366, pvalue=2.1801790744252463e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.34026723998112934, pvalue=0.00038296046531682506)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7436 | Kappa = 0.4722 | Precision = 0.6708 | Recall = 0.8342 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6102 | Kappa = 0.3797 | Precision = 0.45 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 20 | TMV: ['concreteness', 'feltness', 'setting', 'agency', 'agenthood', 'eventfulness', 'temporality'] | POS: ['vbp', 'dt', 'prp', '-lrb-', 'vbz', 'in', 'nn', 'vbn', 'vbd', 'jj', 'nns', 'cd', '-rrb-']\n",
      "POS Train: (12348, 13) | POS Test: (416, 13) | POS feature-columns: ['cd', 'dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 20) 12348 | (416, 20)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6409385525693496\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5736235724980892, pvalue=2.941952635976855e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4876687856511009, pvalue=2.660260909688991e-07)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5398070888732486, pvalue=1.3852221839369743e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.31955102262566176, pvalue=0.0008911967976394091)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7419 | Kappa = 0.4676 | Precision = 0.668 | Recall = 0.8342 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6085 | Kappa = 0.3764 | Precision = 0.4481 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 21 | TMV: ['concreteness', 'feltness', 'setting', 'agency', 'agenthood', 'eventfulness', 'temporality'] | POS: ['vbp', 'dt', 'prp', '-lrb-', 'vbz', 'in', 'nn', 'cc', 'vbn', 'vbd', 'jj', 'nns', 'cd', '-rrb-']\n",
      "POS Train: (12348, 14) | POS Test: (416, 14) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 21) 12348 | (416, 21)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.643851559313399\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5754006617378554, pvalue=2.5187215956039056e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.49539994580317215, pvalue=1.6044233466688934e-07)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5408196032098083, pvalue=1.273056859235721e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3191939355731009, pvalue=0.0009038024732721935)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7465 | Kappa = 0.4771 | Precision = 0.6722 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6085 | Kappa = 0.3764 | Precision = 0.4481 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 22 | TMV: ['concreteness', 'feltness', 'setting', 'agency', 'agenthood', 'eventfulness', 'temporality'] | POS: ['vbp', 'dt', 'prp', '-lrb-', 'vbz', 'in', 'nn', 'cc', 'vbn', 'vbd', 'nnp', 'jj', 'nns', 'cd', '-rrb-']\n",
      "POS Train: (12348, 15) | POS Test: (416, 15) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'nn', 'nnp', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 22) 12348 | (416, 22)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6402036760089534\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.561304373225727, pvalue=8.421372173842245e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4948511226754049, pvalue=1.6637471827824025e-07)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5368582813876394, pvalue=1.7686079186078765e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.32172025319120706, pvalue=0.0008180086063433996)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7436 | Kappa = 0.4722 | Precision = 0.6708 | Recall = 0.8342 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6045 | Kappa = 0.3707 | Precision = 0.4458 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 23 | TMV: ['concreteness', 'feltness', 'setting', 'agency', 'agenthood', 'eventfulness', 'temporality'] | POS: ['vbp', 'dt', 'prp', '-lrb-', 'vbz', 'in', 'nn', 'cc', 'wdt', 'vbn', 'vbd', 'nnp', 'jj', 'nns', 'cd', '-rrb-']\n",
      "POS Train: (12348, 16) | POS Test: (416, 16) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'nn', 'nnp', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 23) 12348 | (416, 23)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.63503950289216\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5580073956715067, pvalue=1.1078415076012497e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.48656606718197554, pvalue=2.856335798363378e-07)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5176100513366787, pvalue=8.237962891205329e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.30788460795373623, pvalue=0.0013978200345181708)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7407 | Kappa = 0.4673 | Precision = 0.6695 | Recall = 0.829 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6119 | Kappa = 0.3829 | Precision = 0.4519 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 24 | TMV: ['concreteness', 'coh_seq', 'feltness', 'setting', 'agency', 'agenthood', 'eventfulness', 'temporality'] | POS: ['vbp', 'dt', 'prp', '-lrb-', 'vbz', 'in', 'nn', 'cc', 'wdt', 'vbn', 'vbd', 'nnp', 'jj', 'nns', 'cd', '-rrb-']\n",
      "POS Train: (12348, 16) | POS Test: (416, 16) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'nn', 'nnp', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 24) 12348 | (416, 24)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.640113041410432\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5519188841221727, pvalue=1.8239982468647563e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5102422349648735, pvalue=5.864515618522993e-08)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5215354282283635, pvalue=6.06619067121933e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3369057484231867, pvalue=0.0004409897029266038)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7419 | Kappa = 0.4676 | Precision = 0.668 | Recall = 0.8342 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6085 | Kappa = 0.3764 | Precision = 0.4481 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 25 | TMV: ['concreteness', 'coh_seq', 'feltness', 'setting', 'agency', 'agenthood', 'eventfulness', 'temporality'] | POS: ['vbp', 'dt', 'prp', '-lrb-', 'vbz', 'to', 'in', 'nn', 'cc', 'wdt', 'vbn', 'vbd', 'nnp', 'jj', 'nns', 'cd', '-rrb-']\n",
      "POS Train: (12348, 17) | POS Test: (416, 17) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'nn', 'nnp', 'nns', 'prp', 'rrb', 'to', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 25) 12348 | (416, 25)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6304166042222416\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5518477440246562, pvalue=1.8345476552075405e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5039285180669664, pvalue=9.051088105246625e-08)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5142272912822481, pvalue=1.0690764093318502e-08)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3016056489029505, pvalue=0.0017678513948395764)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7419 | Kappa = 0.4676 | Precision = 0.668 | Recall = 0.8342 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6085 | Kappa = 0.3764 | Precision = 0.4481 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 26 | TMV: ['concreteness', 'coh_seq', 'feltness', 'setting', 'agency', 'agenthood', 'eventfulness', 'temporality'] | POS: ['vbp', 'dt', 'prp', '-lrb-', 'vbz', 'to', 'in', 'nn', 'cc', 'wdt', 'vbn', 'vbd', 'md', 'nnp', 'jj', 'nns', 'cd', '-rrb-']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 18) | POS Test: (416, 18) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rrb', 'to', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 26) 12348 | (416, 26)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.639833019349403\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5507170000959919, pvalue=2.0102944081902604e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5078589344468009, pvalue=6.915496132957373e-08)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5246279990147178, pvalue=4.753510696901856e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3254024220666999, pvalue=0.0007062420362095805)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7436 | Kappa = 0.4722 | Precision = 0.6708 | Recall = 0.8342 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6102 | Kappa = 0.3797 | Precision = 0.45 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 27 | TMV: ['concreteness', 'coh_seq', 'feltness', 'setting', 'agency', 'agenthood', 'eventfulness', 'temporality'] | POS: ['prp', '-lrb-', 'vbz', 'to', 'vbn', 'in', 'vbd', 'jj', 'rp', 'dt', 'nn', 'cc', 'wdt', 'md', 'cd', 'nnp', '-rrb-', 'vbp', 'nns']\n",
      "POS Train: (12348, 19) | POS Test: (416, 19) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rp', 'rrb', 'to', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 27) 12348 | (416, 27)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6383259475045736\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5560562859278049, pvalue=1.3012070403502253e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5043701664154879, pvalue=8.782958468159247e-08)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5140927678827876, pvalue=1.0801510664031619e-08)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3124654218535442, pvalue=0.0011738975854025896)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7442 | Kappa = 0.4764 | Precision = 0.6751 | Recall = 0.829 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6097 | Kappa = 0.3804 | Precision = 0.4515 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 28 | TMV: ['concreteness', 'coh_seq', 'feltness', 'setting', 'agency', 'agenthood', 'eventfulness', 'temporal_order', 'temporality'] | POS: ['prp', '-lrb-', 'vbz', 'to', 'vbn', 'in', 'vbd', 'jj', 'rp', 'dt', 'nn', 'cc', 'wdt', 'md', 'cd', 'nnp', '-rrb-', 'vbp', 'nns']\n",
      "POS Train: (12348, 19) | POS Test: (416, 19) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rp', 'rrb', 'to', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 28) 12348 | (416, 28)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6354607745993169\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.543848328849917, pvalue=3.479016358587013e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5049905663550867, pvalue=8.41906424750542e-08)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.49690282377894374, pvalue=3.888495359422256e-08)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.33452467438381356, pvalue=0.0004868731147254148)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7344 | Kappa = 0.4532 | Precision = 0.6625 | Recall = 0.8238 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6045 | Kappa = 0.3707 | Precision = 0.4458 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 29 | TMV: ['concreteness', 'coh_seq', 'feltness', 'setting', 'agency', 'agenthood', 'eventfulness', 'temporal_order', 'temporality'] | POS: ['prp', '-lrb-', 'vbz', 'to', 'vbn', 'vb', 'in', 'vbd', 'jj', 'rp', 'dt', 'nn', 'cc', 'wdt', 'md', 'cd', 'nnp', '-rrb-', 'vbp', 'nns']\n",
      "POS Train: (12348, 20) | POS Test: (416, 20) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 29) 12348 | (416, 29)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6298847299666039\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5458959031498967, pvalue=2.9580797071014768e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4909190878666753, pvalue=2.1540548081215098e-07)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5166962220851847, pvalue=8.841356030258856e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3024862506424115, pvalue=0.0017110993171383796)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.746 | Kappa = 0.4729 | Precision = 0.668 | Recall = 0.8446 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6089 | Kappa = 0.3758 | Precision = 0.4467 | Recall = 0.9561 | Total Items = 416\n"
     ]
    }
   ],
   "source": [
    "algo = RandomForestClassifier(n_estimators=500, max_depth=None, random_state=seed_value) # the best pos-TMV parameters\n",
    "with open('/Users/sunyambagga/Desktop/txtLAB-2/detecting-narrativity/results/WithQuotation_topN_reader_annotated_rf.tsv', 'w') as F:\n",
    "    F.write(\"Top-N\\tSpearman-Correlation\\tF1-2.5\\tKappa-2.5\\tF1-3\\tKappa-3\\n\")\n",
    "    for n in range(1,30):\n",
    "        print(\"\\n###########################################\\n\")\n",
    "        corr, f12, kap2, f13, kap3 = process(algo, n)\n",
    "        F.write(str(n)+'\\t'+corr+'\\t'+f12+'\\t'+kap2+'\\t'+f13+'\\t'+kap3+'\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 30 | TMV: ['eventfulness', 'setting', 'agency', 'coh_seq', 'feltness', 'temporal_order', 'concreteness', 'saying', 'temporality', 'agenthood'] | POS: ['cd', '-rrb-', 'nns', 'vbd', 'in', 'cc', 'nnp', 'dt', 'to', 'nn', 'prp', 'jj', 'vb', 'vbp', 'md', 'vbz', 'wdt', 'vbn', '-lrb-', 'rp']\n",
      "POS Train: (12348, 20) | POS Test: (416, 20) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 30) 12348 | (416, 30)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.631130130703637\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.2873570674683852, pvalue=0.0029563429818771016)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5004417624955442, pvalue=1.1459421249634228e-07)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.49868287197723987, pvalue=3.416561152953861e-08)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5492011564680321, pvalue=2.2713816665712188e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.738 | Kappa = 0.4543 | Precision = 0.6585 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6056 | Kappa = 0.3694 | Precision = 0.4431 | Recall = 0.9561 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 31 | TMV: ['eventfulness', 'setting', 'agency', 'coh_seq', 'feltness', 'temporal_order', 'concreteness', 'saying', 'temporality', 'agenthood'] | POS: ['cd', '-rrb-', 'nns', 'vbd', 'in', 'cc', 'nnp', 'dt', 'to', 'nn', 'prp', 'rb', 'jj', 'vb', 'vbp', 'md', 'vbz', 'wdt', 'vbn', '-lrb-', 'rp']\n",
      "POS Train: (12348, 21) | POS Test: (416, 21) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 31) 12348 | (416, 31)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6337891247271196\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.32190634034702187, pvalue=0.0008119933969139815)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5036422719300045, pvalue=9.229018530608005e-08)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5084834662170679, pvalue=1.653474729634961e-08)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5407403920054228, pvalue=4.441061322433286e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7373 | Kappa = 0.4581 | Precision = 0.6639 | Recall = 0.829 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6028 | Kappa = 0.3675 | Precision = 0.444 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 32 | TMV: ['eventfulness', 'setting', 'agency', 'coh_seq', 'feltness', 'temporal_order', 'concreteness', 'saying', 'temporality', 'agenthood'] | POS: ['cd', '-rrb-', 'nns', 'vbd', 'in', 'cc', 'nnp', 'dt', 'to', 'nn', 'prp', 'rb', \"'\", 'jj', 'vb', 'vbp', 'md', 'vbz', 'wdt', 'vbn', '-lrb-', 'rp']\n",
      "POS Train: (12348, 21) | POS Test: (416, 21) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 31) 12348 | (416, 31)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6337891247271196\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.32190634034702187, pvalue=0.0008119933969139815)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5036422719300045, pvalue=9.229018530608005e-08)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5084834662170679, pvalue=1.653474729634961e-08)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5407403920054228, pvalue=4.441061322433286e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7373 | Kappa = 0.4581 | Precision = 0.6639 | Recall = 0.829 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6028 | Kappa = 0.3675 | Precision = 0.444 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 33 | TMV: ['eventfulness', 'setting', 'agency', 'coh_seq', 'feltness', 'temporal_order', 'concreteness', 'saying', 'temporality', 'agenthood'] | POS: ['cd', '-rrb-', 'nns', 'vbd', 'in', 'cc', 'nnp', 'dt', 'to', 'nn', 'prp', 'rb', \"'\", 'jj', 'pos', 'vb', 'vbp', 'md', 'vbz', 'wdt', 'vbn', '-lrb-', 'rp']\n",
      "POS Train: (12348, 22) | POS Test: (416, 22) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 32) 12348 | (416, 32)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6351201271081471\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.33229780166274775, pvalue=0.0005337147447807523)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.49522209014182234, pvalue=1.6234241607062612e-07)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5162463383700197, pvalue=9.153748848270571e-09)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5530402808449444, pvalue=1.6652004022396948e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7373 | Kappa = 0.4581 | Precision = 0.6639 | Recall = 0.829 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6028 | Kappa = 0.3675 | Precision = 0.444 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 34 | TMV: ['eventfulness', 'setting', 'agency', 'coh_seq', 'feltness', 'temporal_order', 'concreteness', 'saying', 'temporality', 'agenthood'] | POS: ['cd', '-rrb-', 'nns', 'vbd', 'in', 'cc', 'nnp', 'dt', 'to', 'nn', 'prp', 'vbg', 'rb', \"'\", 'jj', 'pos', 'vb', 'vbp', 'md', 'vbz', 'wdt', 'vbn', '-lrb-', 'rp']\n",
      "POS Train: (12348, 23) | POS Test: (416, 23) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 33) 12348 | (416, 33)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6353350371628409\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.32610707607992667, pvalue=0.0006865184287253268)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5025455132061658, pvalue=9.942107946227855e-08)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5067136812262255, pvalue=1.8882108148243214e-08)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5500748646497328, pvalue=2.1171736710397334e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7364 | Kappa = 0.4497 | Precision = 0.6559 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5983 | Kappa = 0.3573 | Precision = 0.4372 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 35 | TMV: ['eventfulness', 'setting', 'agency', 'coh_seq', 'feltness', 'temporal_order', 'concreteness', 'saying', 'temporality', 'agenthood'] | POS: ['cd', '-rrb-', 'nns', 'vbd', '``', 'in', 'cc', 'nnp', 'dt', 'to', 'nn', 'prp', 'vbg', 'rb', \"'\", 'jj', 'pos', 'vb', 'vbp', 'md', 'vbz', 'wdt', 'vbn', '-lrb-', 'rp']\n",
      "POS Train: (12348, 24) | POS Test: (416, 24) | POS feature-columns: ['``', 'cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 34) 12348 | (416, 34)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6254002553490702\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.2830308804089986, pvalue=0.0034384258030347495)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4901912783398816, pvalue=2.2587357903167348e-07)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.4986214305481896, pvalue=3.4318959747925296e-08)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5423517576812619, pvalue=3.9142446584574475e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7318 | Kappa = 0.4403 | Precision = 0.6518 | Recall = 0.8342 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5983 | Kappa = 0.3573 | Precision = 0.4372 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 36 | TMV: ['eventfulness', 'setting', 'agency', 'coh_seq', 'feltness', 'temporal_order', 'concreteness', 'saying', 'temporality', 'agenthood'] | POS: ['cd', '-rrb-', 'nns', 'vbd', '``', 'in', 'cc', 'nnp', 'dt', 'to', 'nn', 'prp', 'vbg', 'rb', \"'\", 'jj', 'pos', 'vb', 'vbp', 'md', 'wp', 'vbz', 'wdt', 'vbn', '-lrb-', 'rp']\n",
      "POS Train: (12348, 25) | POS Test: (416, 25) | POS feature-columns: ['``', 'cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt', 'wp']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 35) 12348 | (416, 35)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6279233437636895\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.31038457697381655, pvalue=0.001271219696552237)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4816695129783359, pvalue=3.9052830275477663e-07)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.507861766199549, pvalue=1.7325571354895163e-08)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5504680750368788, pvalue=2.051097199974249e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7356 | Kappa = 0.4536 | Precision = 0.6612 | Recall = 0.829 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6011 | Kappa = 0.3643 | Precision = 0.4421 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 37 | TMV: ['eventfulness', 'setting', 'agency', 'coh_seq', 'feltness', 'temporal_order', 'concreteness', 'saying', 'temporality', 'agenthood'] | POS: ['cd', '-rrb-', 'nns', 'vbd', '``', 'in', 'cc', 'nnp', 'dt', 'to', 'nn', 'prp', 'nnps', 'vbg', 'rb', \"'\", 'jj', 'pos', 'vb', 'vbp', 'md', 'wp', 'vbz', 'wdt', 'vbn', '-lrb-', 'rp']\n",
      "POS Train: (12348, 26) | POS Test: (416, 26) | POS feature-columns: ['``', 'cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nnps', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt', 'wp']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 36) 12348 | (416, 36)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6337425490800095\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.29814303342304854, pvalue=0.0020079154964501417)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.48885578102542954, pvalue=2.4635280412070626e-07)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5119368880139522, pvalue=1.2733541623722846e-08)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5481272384652613, pvalue=2.4757147379180794e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7477 | Kappa = 0.4775 | Precision = 0.6708 | Recall = 0.8446 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6106 | Kappa = 0.379 | Precision = 0.4486 | Recall = 0.9561 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 38 | TMV: ['eventfulness', 'setting', 'agency', 'coh_seq', 'feltness', 'temporal_order', 'concreteness', 'saying', 'temporality', 'agenthood'] | POS: ['cd', '-rrb-', 'nns', 'vbd', '``', 'in', 'cc', 'nnp', 'dt', 'to', 'nn', 'prp', 'nnps', 'vbg', 'rb', \"'\", 'jj', 'pos', 'vb', 'vbp', 'md', 'wp', 'vbz', 'wdt', 'vbn', 'wrb', '-lrb-', 'rp']\n",
      "POS Train: (12348, 27) | POS Test: (416, 27) | POS feature-columns: ['``', 'cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nnps', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt', 'wp', 'wrb']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 37) 12348 | (416, 37)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.633855497787045\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.29984054300244756, pvalue=0.0018867699247467565)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.49586693554415756, pvalue=1.5555359303505382e-07)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5073271056794028, pvalue=1.8034517712036744e-08)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.552187709780193, pvalue=1.7846569731254468e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.739 | Kappa = 0.4627 | Precision = 0.6667 | Recall = 0.829 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6045 | Kappa = 0.3707 | Precision = 0.4458 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 39 | TMV: ['eventfulness', 'setting', 'agency', 'coh_seq', 'feltness', 'temporal_order', 'concreteness', 'saying', 'temporality', 'agenthood'] | POS: ['cd', '-rrb-', 'nns', 'vbd', '``', 'in', 'cc', 'nnp', 'dt', 'to', 'nn', 'prp', 'nnps', 'vbg', 'rb', \"'\", 'jj', 'pos', 'vb', 'vbp', 'md', 'wp', 'vbz', 'wdt', 'vbn', 'wrb', '-lrb-', 'rp', 'ex']\n",
      "POS Train: (12348, 28) | POS Test: (416, 28) | POS feature-columns: ['``', 'cc', 'cd', 'dt', 'ex', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nnps', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt', 'wp', 'wrb']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 38) 12348 | (416, 38)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.633671292479684\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3084916349139122, pvalue=0.0013660688849268044)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.49245381927649323, pvalue=1.9482398467809225e-07)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.48922913267217144, pvalue=6.735898234754988e-08)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5577636480557927, pvalue=1.130396612200382e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7414 | Kappa = 0.4634 | Precision = 0.6639 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6089 | Kappa = 0.3758 | Precision = 0.4467 | Recall = 0.9561 | Total Items = 416\n"
     ]
    }
   ],
   "source": [
    "algo = RandomForestClassifier(n_estimators=500, max_depth=None, random_state=seed_value) # the best pos-TMV parameters\n",
    "with open('/Users/sunyambagga/Desktop/txtLAB-2/detecting-narrativity/results/WithQuotation_topN_reader_annotated_rf_part2.tsv', 'w') as F:\n",
    "    F.write(\"Top-N\\tSpearman-Correlation\\tF1-2.5\\tKappa-2.5\\tF1-3\\tKappa-3\\n\")\n",
    "    for n in range(30,40):\n",
    "        print(\"\\n###########################################\\n\")\n",
    "        corr, f12, kap2, f13, kap3 = process(algo, n)\n",
    "        F.write(str(n)+'\\t'+corr+'\\t'+f12+'\\t'+kap2+'\\t'+f13+'\\t'+kap3+'\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 5 | TMV: ['agenthood', 'concreteness'] | POS: ['nn', 'vbz', 'vbd']\n",
      "POS Train: (12348, 3) | POS Test: (416, 3) | POS feature-columns: ['nn', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 5) 12348 | (416, 5)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6633684790974758\n",
      "\t POETRY -- SpearmanrResult(correlation=0.524794110456135, pvalue=2.0845771521045134e-08)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.6437485464320079, pvalue=2.923575222142942e-13)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3500371009181058, pvalue=0.00025183458999138966)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6109819831325299, pvalue=1.7237834505774033e-12)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7685 | Kappa = 0.5243 | Precision = 0.6946 | Recall = 0.8601 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6176 | Kappa = 0.3919 | Precision = 0.4561 | Recall = 0.9561 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 6 | TMV: ['agenthood', 'concreteness'] | POS: ['nn', 'vbz', '-rrb-', 'vbd']\n",
      "POS Train: (12348, 4) | POS Test: (416, 4) | POS feature-columns: ['nn', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 6) 12348 | (416, 6)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6485996372429937\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5056827869005719, pvalue=8.030014067105563e-08)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.6083854179736156, pvalue=1.1807372263738949e-11)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.345250081349775, pvalue=0.0003097773792207934)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.562659458675316, pvalue=1.9202920958993462e-10)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7506 | Kappa = 0.4824 | Precision = 0.6721 | Recall = 0.8497 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6034 | Kappa = 0.3668 | Precision = 0.4426 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 7 | TMV: ['agenthood', 'concreteness'] | POS: ['vbz', 'nn', '-rrb-', '-lrb-', 'vbd']\n",
      "POS Train: (12348, 5) | POS Test: (416, 5) | POS feature-columns: ['lrb', 'nn', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 7) 12348 | (416, 7)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6446460030976092\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5051254198807521, pvalue=8.341890208706455e-08)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5979572456917666, pvalue=3.226325407375775e-11)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.34342182629630297, pvalue=0.00033498201805179053)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5398464391444778, pvalue=1.3806910258420017e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7483 | Kappa = 0.4736 | Precision = 0.6653 | Recall = 0.8549 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5967 | Kappa = 0.3542 | Precision = 0.4355 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 8 | TMV: ['agenthood', 'concreteness'] | POS: ['vbz', 'nn', '-rrb-', 'jj', '-lrb-', 'vbd']\n",
      "POS Train: (12348, 6) | POS Test: (416, 6) | POS feature-columns: ['jj', 'lrb', 'nn', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 8) 12348 | (416, 8)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6423197582582694\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5149205828415468, pvalue=4.2276509189182765e-08)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5998442852093703, pvalue=2.696969384170744e-11)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.35577061856682124, pvalue=0.00019566151827785224)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5237216744884475, pvalue=5.106936233752704e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7455 | Kappa = 0.4687 | Precision = 0.664 | Recall = 0.8497 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5928 | Kappa = 0.3485 | Precision = 0.4332 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 9 | TMV: ['agenthood', 'concreteness'] | POS: ['in', 'vbz', 'nn', '-rrb-', 'jj', '-lrb-', 'vbd']\n",
      "POS Train: (12348, 7) | POS Test: (416, 7) | POS feature-columns: ['in', 'jj', 'lrb', 'nn', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 9) 12348 | (416, 9)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6345886147550618\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5060475212155814, pvalue=7.831976907616548e-08)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5582749315533158, pvalue=1.0835826055819495e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3311997646826293, pvalue=0.0005583033424080387)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5162388396835392, pvalue=9.159044377741846e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7385 | Kappa = 0.4585 | Precision = 0.6626 | Recall = 0.8342 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5994 | Kappa = 0.3611 | Precision = 0.4403 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 10 | TMV: ['agenthood', 'concreteness'] | POS: ['in', 'vbz', 'prp', 'nn', '-rrb-', 'jj', '-lrb-', 'vbd']\n",
      "POS Train: (12348, 8) | POS Test: (416, 8) | POS feature-columns: ['in', 'jj', 'lrb', 'nn', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 10) 12348 | (416, 10)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6344412950442965\n",
      "\t POETRY -- SpearmanrResult(correlation=0.48640994471472726, pvalue=2.88518197432648e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5702841792995436, pvalue=3.9292573728309126e-10)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.2584291889107824, pvalue=0.007772661709363981)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5502424524343419, pvalue=5.723529778417982e-10)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7409 | Kappa = 0.4592 | Precision = 0.6599 | Recall = 0.8446 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5928 | Kappa = 0.3485 | Precision = 0.4332 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 11 | TMV: ['eventfulness', 'agenthood', 'concreteness'] | POS: ['in', 'vbz', 'prp', 'nn', '-rrb-', 'jj', '-lrb-', 'vbd']\n",
      "POS Train: (12348, 8) | POS Test: (416, 8) | POS feature-columns: ['in', 'jj', 'lrb', 'nn', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 11) 12348 | (416, 11)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6441495720992713\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4867373639053057, pvalue=2.8250014240721144e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5907635296727969, pvalue=6.321551291687134e-11)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.322926288072016, pvalue=0.0007797361519771653)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5476247578637644, pvalue=7.16462261495958e-10)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7431 | Kappa = 0.468 | Precision = 0.6667 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5994 | Kappa = 0.3611 | Precision = 0.4403 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 12 | TMV: ['eventfulness', 'agenthood', 'concreteness'] | POS: ['in', 'dt', 'vbz', 'prp', 'nn', '-rrb-', 'jj', '-lrb-', 'vbd']\n",
      "POS Train: (12348, 9) | POS Test: (416, 9) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 12) 12348 | (416, 12)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6411340460706821\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5006721637035071, pvalue=1.128306970038834e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5813244721502424, pvalue=1.4906729329216535e-10)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.29607909602497623, pvalue=0.0021646627291899393)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5502171696162111, pvalue=5.736010716770429e-10)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7465 | Kappa = 0.4771 | Precision = 0.6722 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6028 | Kappa = 0.3675 | Precision = 0.444 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 13 | TMV: ['eventfulness', 'agenthood', 'concreteness'] | POS: ['in', 'dt', 'vbz', 'prp', 'nn', '-rrb-', 'nns', 'jj', '-lrb-', 'vbd']\n",
      "POS Train: (12348, 10) | POS Test: (416, 10) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 13) 12348 | (416, 13)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6325065187461679\n",
      "\t POETRY -- SpearmanrResult(correlation=0.47533354579745396, pvalue=5.811972593724585e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.566228755201822, pvalue=5.559758299090293e-10)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.27170969732954264, pvalue=0.005050113948731508)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.545287224355872, pvalue=8.741332040950457e-10)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7397 | Kappa = 0.4589 | Precision = 0.6612 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5961 | Kappa = 0.3548 | Precision = 0.4367 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 14 | TMV: ['eventfulness', 'setting', 'concreteness', 'agenthood'] | POS: ['in', 'dt', 'vbz', 'prp', 'nn', '-rrb-', 'nns', 'jj', '-lrb-', 'vbd']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 10) | POS Test: (416, 10) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 14) 12348 | (416, 14)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6378118788357315\n",
      "\t POETRY -- SpearmanrResult(correlation=0.49762442452980155, pvalue=1.3839604432676683e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5611665768749515, pvalue=8.518958764225752e-10)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.30627819597300315, pvalue=0.0014851061305255107)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.554098775669108, pvalue=4.0967939074703343e-10)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.738 | Kappa = 0.4543 | Precision = 0.6585 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6 | Kappa = 0.3605 | Precision = 0.439 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 15 | TMV: ['agency', 'eventfulness', 'setting', 'agenthood', 'concreteness'] | POS: ['in', 'dt', 'vbz', 'prp', 'nn', '-rrb-', 'nns', 'jj', '-lrb-', 'vbd']\n",
      "POS Train: (12348, 10) | POS Test: (416, 10) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 15) 12348 | (416, 15)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6297059419981168\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4879608325823236, pvalue=2.610516266528885e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.548768372159696, pvalue=2.351701843863128e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.281442559679427, pvalue=0.003632392753315406)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5374020842122748, pvalue=1.6909849364634643e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7364 | Kappa = 0.4497 | Precision = 0.6559 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5983 | Kappa = 0.3573 | Precision = 0.4372 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 16 | TMV: ['agency', 'temporality', 'eventfulness', 'setting', 'agenthood', 'concreteness'] | POS: ['in', 'dt', 'vbz', 'prp', 'nn', '-rrb-', 'nns', 'jj', '-lrb-', 'vbd']\n",
      "POS Train: (12348, 10) | POS Test: (416, 10) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 16) 12348 | (416, 16)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6363908634913639\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4773727459258151, pvalue=5.118333403305606e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5817989375124437, pvalue=1.428693369115281e-10)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.2879239483660959, pvalue=0.0028978988764318814)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5535155730926608, pvalue=4.310473359707947e-10)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.746 | Kappa = 0.4729 | Precision = 0.668 | Recall = 0.8446 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6034 | Kappa = 0.3668 | Precision = 0.4426 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 17 | TMV: ['agency', 'temporality', 'eventfulness', 'setting', 'agenthood', 'concreteness'] | POS: ['in', 'dt', 'vbz', 'prp', 'nn', 'vbp', '-rrb-', 'nns', 'jj', '-lrb-', 'vbd']\n",
      "POS Train: (12348, 11) | POS Test: (416, 11) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 17) 12348 | (416, 17)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.641938207942884\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5045300463048117, pvalue=8.687773275672571e-08)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.573662850250846, pvalue=2.9318996643729776e-10)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3166688855656848, pvalue=0.000997690293289081)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5430811050442114, pvalue=1.053172640140816e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7477 | Kappa = 0.4775 | Precision = 0.6708 | Recall = 0.8446 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.605 | Kappa = 0.37 | Precision = 0.4444 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 18 | TMV: ['agency', 'temporality', 'eventfulness', 'setting', 'agenthood', 'concreteness'] | POS: ['vbn', 'in', 'vbz', 'prp', 'dt', 'nn', 'vbp', '-rrb-', 'nns', 'jj', '-lrb-', 'vbd']\n",
      "POS Train: (12348, 12) | POS Test: (416, 12) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 18) 12348 | (416, 18)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6358834892296603\n",
      "\t POETRY -- SpearmanrResult(correlation=0.47330504094156106, pvalue=6.589858421497291e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.576554086281215, pvalue=2.2760495740303916e-10)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.320939727972601, pvalue=0.000843685171510049)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5286304423003895, pvalue=3.4543823050059003e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7419 | Kappa = 0.4676 | Precision = 0.668 | Recall = 0.8342 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6085 | Kappa = 0.3764 | Precision = 0.4481 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 19 | TMV: ['agency', 'temporality', 'eventfulness', 'setting', 'agenthood', 'concreteness'] | POS: ['vbn', 'vbz', 'in', 'prp', 'dt', 'vbp', 'nn', '-rrb-', 'nns', 'jj', '-lrb-', 'vbd', 'cc']\n",
      "POS Train: (12348, 13) | POS Test: (416, 13) | POS feature-columns: ['cc', 'dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 19) 12348 | (416, 19)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6388565588983673\n",
      "\t POETRY -- SpearmanrResult(correlation=0.47768701142874004, pvalue=5.018692194565438e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5623593478921722, pvalue=7.70901174352504e-10)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3473990661618559, pvalue=0.0002823925354377761)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5283403936803143, pvalue=3.5357232412042515e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.739 | Kappa = 0.4627 | Precision = 0.6667 | Recall = 0.829 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6102 | Kappa = 0.3797 | Precision = 0.45 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 20 | TMV: ['agency', 'temporality', 'eventfulness', 'feltness', 'setting', 'agenthood', 'concreteness'] | POS: ['vbn', 'vbz', 'in', 'prp', 'dt', 'vbp', 'nn', '-rrb-', 'nns', 'jj', '-lrb-', 'vbd', 'cc']\n",
      "POS Train: (12348, 13) | POS Test: (416, 13) | POS feature-columns: ['cc', 'dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 20) 12348 | (416, 20)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6431892595814256\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5043077562527668, pvalue=8.820383670637726e-08)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5798910870558194, pvalue=1.6940302506483068e-10)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.31925322875235873, pvalue=0.0009016981313228031)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5342429887650637, pvalue=2.192256613232734e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7436 | Kappa = 0.4722 | Precision = 0.6708 | Recall = 0.8342 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6102 | Kappa = 0.3797 | Precision = 0.45 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 21 | TMV: ['agency', 'temporality', 'eventfulness', 'feltness', 'setting', 'agenthood', 'concreteness'] | POS: ['vbn', 'vbz', 'in', 'prp', 'dt', 'vbp', 'nn', '-rrb-', 'cd', 'nns', 'jj', '-lrb-', 'vbd', 'cc']\n",
      "POS Train: (12348, 14) | POS Test: (416, 14) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 21) 12348 | (416, 21)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6432602773275637\n",
      "\t POETRY -- SpearmanrResult(correlation=0.48734277970124557, pvalue=2.7168540703609094e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5718875797027683, pvalue=3.4209258227033716e-10)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3372413097134239, pvalue=0.0004348531842097849)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.548819407702421, pvalue=6.46826224525835e-10)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7512 | Kappa = 0.4866 | Precision = 0.6763 | Recall = 0.8446 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6085 | Kappa = 0.3764 | Precision = 0.4481 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 22 | TMV: ['agency', 'temporality', 'eventfulness', 'feltness', 'setting', 'agenthood', 'concreteness'] | POS: ['vbn', 'vbz', 'in', 'prp', 'dt', 'nnp', 'vbp', 'nn', '-rrb-', 'cd', 'nns', 'jj', '-lrb-', 'vbd', 'cc']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 15) | POS Test: (416, 15) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'nn', 'nnp', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 22) 12348 | (416, 22)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6387331662917587\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4877115928452801, pvalue=2.652913566121235e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.567057822592276, pvalue=5.180887874871349e-10)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.30792698940424784, pvalue=0.0013955816481336504)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5267678761014382, pvalue=4.0097136996624794e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7344 | Kappa = 0.4532 | Precision = 0.6625 | Recall = 0.8238 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6045 | Kappa = 0.3707 | Precision = 0.4458 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 23 | TMV: ['agency', 'coh_seq', 'temporality', 'eventfulness', 'feltness', 'setting', 'agenthood', 'concreteness'] | POS: ['vbn', 'vbz', 'in', 'prp', 'dt', 'nnp', 'vbp', 'nn', '-rrb-', 'cd', 'nns', 'jj', '-lrb-', 'vbd', 'cc']\n",
      "POS Train: (12348, 15) | POS Test: (416, 15) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'nn', 'nnp', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 23) 12348 | (416, 23)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6326636318818734\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4851247177016516, pvalue=3.133399721704537e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5440637623426265, pvalue=3.420319847188131e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.33374949000881143, pvalue=0.0005027322910322263)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5233796270634167, pvalue=5.246760011300557e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7385 | Kappa = 0.4585 | Precision = 0.6626 | Recall = 0.8342 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.605 | Kappa = 0.37 | Precision = 0.4444 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 24 | TMV: ['agency', 'coh_seq', 'temporality', 'eventfulness', 'feltness', 'setting', 'agenthood', 'concreteness'] | POS: ['vbn', 'vbz', 'in', 'prp', 'dt', 'nnp', 'vbp', 'nn', '-rrb-', 'cd', 'wdt', 'nns', 'jj', '-lrb-', 'vbd', 'cc']\n",
      "POS Train: (12348, 16) | POS Test: (416, 16) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'nn', 'nnp', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 24) 12348 | (416, 24)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6377019234219841\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4954417756513167, pvalue=1.5999853957895684e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5494347569531083, pvalue=2.229128103384019e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.34818713155862324, pvalue=0.0002729232250412027)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5259236336254632, pvalue=4.288749718818991e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7407 | Kappa = 0.4673 | Precision = 0.6695 | Recall = 0.829 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6062 | Kappa = 0.3739 | Precision = 0.4477 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 25 | TMV: ['agency', 'coh_seq', 'temporality', 'eventfulness', 'feltness', 'setting', 'agenthood', 'concreteness'] | POS: ['vbn', 'to', 'in', 'vbz', 'prp', 'dt', 'nnp', 'vbp', 'nn', '-rrb-', 'cd', 'wdt', 'nns', 'jj', '-lrb-', 'vbd', 'cc']\n",
      "POS Train: (12348, 17) | POS Test: (416, 17) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'nn', 'nnp', 'nns', 'prp', 'rrb', 'to', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 25) 12348 | (416, 25)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6375734283880433\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4956215866417704, pvalue=1.581041016937434e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5557377672262617, pvalue=1.3357025253355671e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3375700953891498, pvalue=0.00042891684463493057)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.523291490634672, pvalue=5.283379397329209e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7436 | Kappa = 0.4722 | Precision = 0.6708 | Recall = 0.8342 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6102 | Kappa = 0.3797 | Precision = 0.45 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 26 | TMV: ['agency', 'coh_seq', 'temporality', 'eventfulness', 'feltness', 'setting', 'agenthood', 'concreteness'] | POS: ['md', 'to', 'vbn', 'in', 'vbz', 'prp', 'dt', 'nnp', 'vbp', 'nn', '-rrb-', 'cd', 'wdt', 'nns', 'jj', '-lrb-', 'vbd', 'cc']\n",
      "POS Train: (12348, 18) | POS Test: (416, 18) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rrb', 'to', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 26) 12348 | (416, 26)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6386125306408384\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5014814811634375, pvalue=1.0683876000730773e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5574700247943023, pvalue=1.158152563289282e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3241926117013122, pvalue=0.0007413190312656566)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5127315245256204, pvalue=1.1985764085691441e-08)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7378 | Kappa = 0.4623 | Precision = 0.6681 | Recall = 0.8238 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.608 | Kappa = 0.3771 | Precision = 0.4496 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 27 | TMV: ['agency', 'coh_seq', 'temporality', 'eventfulness', 'feltness', 'setting', 'agenthood', 'concreteness'] | POS: ['to', 'in', 'vbp', '-rrb-', 'md', 'vbz', 'prp', 'nnp', 'nn', 'wdt', '-lrb-', 'dt', 'rp', 'nns', 'jj', 'vbd', 'cc', 'vbn', 'cd']\n",
      "POS Train: (12348, 19) | POS Test: (416, 19) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rp', 'rrb', 'to', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 27) 12348 | (416, 27)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6373605314048216\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5088242412570391, pvalue=6.469827021717848e-08)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5453541164429728, pvalue=3.0881329900656703e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.31660395457696316, pvalue=0.001000217845841662)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5154715288951872, pvalue=9.716713269515564e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7442 | Kappa = 0.4764 | Precision = 0.6751 | Recall = 0.829 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6097 | Kappa = 0.3804 | Precision = 0.4515 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 28 | TMV: ['agency', 'coh_seq', 'temporality', 'eventfulness', 'feltness', 'setting', 'agenthood', 'concreteness', 'temporal_order'] | POS: ['to', 'in', 'vbp', '-rrb-', 'md', 'vbz', 'prp', 'nnp', 'nn', 'wdt', '-lrb-', 'dt', 'rp', 'nns', 'jj', 'vbd', 'cc', 'vbn', 'cd']\n",
      "POS Train: (12348, 19) | POS Test: (416, 19) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rp', 'rrb', 'to', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 28) 12348 | (416, 28)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6350052679738748\n",
      "\t POETRY -- SpearmanrResult(correlation=0.49318442008843016, pvalue=1.856970687083262e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5561257744243752, pvalue=1.2937959030762871e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.323148358339191, pvalue=0.0007728701247501442)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5078329603761892, pvalue=1.736307733098417e-08)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7373 | Kappa = 0.4581 | Precision = 0.6639 | Recall = 0.829 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6085 | Kappa = 0.3764 | Precision = 0.4481 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 29 | TMV: ['agency', 'coh_seq', 'temporality', 'eventfulness', 'feltness', 'setting', 'agenthood', 'concreteness', 'temporal_order'] | POS: ['to', 'in', 'vbp', '-rrb-', 'vb', 'md', 'vbz', 'prp', 'nnp', 'nn', 'wdt', '-lrb-', 'dt', 'rp', 'nns', 'jj', 'vbd', 'cc', 'vbn', 'cd']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 20) | POS Test: (416, 20) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 29) 12348 | (416, 29)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6312171878459463\n",
      "\t POETRY -- SpearmanrResult(correlation=0.48317603275936344, pvalue=3.548808231941455e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5472847810284079, pvalue=2.64823078586168e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.31017446862471, pvalue=0.0012814436996149176)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5049683498545482, pvalue=2.1507343570680853e-08)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7419 | Kappa = 0.4676 | Precision = 0.668 | Recall = 0.8342 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6085 | Kappa = 0.3764 | Precision = 0.4481 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 30 | TMV: ['agency', 'coh_seq', 'temporality', 'eventfulness', 'feltness', 'setting', 'saying', 'agenthood', 'concreteness', 'temporal_order'] | POS: ['to', 'in', 'vbp', '-rrb-', 'vb', 'md', 'vbz', 'prp', 'nnp', 'nn', 'wdt', '-lrb-', 'dt', 'rp', 'nns', 'jj', 'vbd', 'cc', 'vbn', 'cd']\n",
      "POS Train: (12348, 20) | POS Test: (416, 20) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 30) 12348 | (416, 30)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6284178893909825\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4887468120178135, pvalue=2.48099511396294e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5420049944665466, pvalue=4.02229191962107e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3113563028250228, pvalue=0.0012248931999382347)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5005117032216414, pvalue=2.988975299815357e-08)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7306 | Kappa = 0.4399 | Precision = 0.6531 | Recall = 0.829 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6017 | Kappa = 0.3637 | Precision = 0.4408 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 31 | TMV: ['agency', 'coh_seq', 'temporality', 'eventfulness', 'feltness', 'setting', 'saying', 'agenthood', 'concreteness', 'temporal_order'] | POS: ['to', 'in', 'vbp', '-rrb-', 'vb', 'md', 'vbz', 'prp', 'nnp', 'nn', 'wdt', '-lrb-', 'dt', 'rp', 'nns', 'rb', 'jj', 'vbd', 'cc', 'vbn', 'cd']\n",
      "POS Train: (12348, 21) | POS Test: (416, 21) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 31) 12348 | (416, 31)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6385980729071312\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5012556657629477, pvalue=1.08479413425445e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5524292115626707, pvalue=1.7500096671327805e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3217419682186413, pvalue=0.0008173045815268794)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5197391381117702, pvalue=6.981412776473809e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7419 | Kappa = 0.4676 | Precision = 0.668 | Recall = 0.8342 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6085 | Kappa = 0.3764 | Precision = 0.4481 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 32 | TMV: ['agency', 'coh_seq', 'temporality', 'eventfulness', 'feltness', 'setting', 'saying', 'agenthood', 'concreteness', 'temporal_order'] | POS: ['to', 'in', 'vbp', '-rrb-', 'vb', 'md', 'vbz', 'prp', 'pos', 'nnp', 'nn', 'wdt', '-lrb-', 'dt', 'rp', 'nns', 'rb', 'jj', 'vbd', 'cc', 'vbn', 'cd']\n",
      "POS Train: (12348, 22) | POS Test: (416, 22) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 32) 12348 | (416, 32)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6276655108202905\n",
      "\t POETRY -- SpearmanrResult(correlation=0.48432101155026597, pvalue=3.298795445362124e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5396647897529971, pvalue=4.8298305172509995e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.31224386592048103, pvalue=0.0011839271742409077)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5047538189990367, pvalue=2.1853158195979338e-08)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7335 | Kappa = 0.4448 | Precision = 0.6545 | Recall = 0.8342 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6 | Kappa = 0.3605 | Precision = 0.439 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 33 | TMV: ['agency', 'coh_seq', 'temporality', 'eventfulness', 'feltness', 'setting', 'saying', 'agenthood', 'concreteness', 'temporal_order'] | POS: ['to', 'in', 'vbp', '-rrb-', 'vb', 'md', 'vbz', 'prp', 'pos', 'nnp', 'nn', 'wdt', '-lrb-', 'dt', 'rp', 'vbg', 'nns', 'rb', 'jj', 'vbd', 'cc', 'vbn', 'cd']\n",
      "POS Train: (12348, 23) | POS Test: (416, 23) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 33) 12348 | (416, 33)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6306111706452205\n",
      "\t POETRY -- SpearmanrResult(correlation=0.49506982174077274, pvalue=1.6398614131667124e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.552169755594358, pvalue=1.7872589043524208e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.29809773778372645, pvalue=0.0020112423198888536)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.4896267781558687, pvalue=6.54898342075732e-08)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7448 | Kappa = 0.4726 | Precision = 0.6694 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6067 | Kappa = 0.3732 | Precision = 0.4463 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 34 | TMV: ['agency', 'coh_seq', 'temporality', 'eventfulness', 'feltness', 'setting', 'saying', 'agenthood', 'concreteness', 'temporal_order'] | POS: ['to', 'in', 'vbp', '-rrb-', 'vb', 'md', 'vbz', 'prp', 'pos', 'nnp', 'nn', 'wdt', '-lrb-', 'dt', 'rp', 'vbg', 'nns', 'rb', 'jj', 'wp', 'vbd', 'cc', 'vbn', 'cd']\n",
      "POS Train: (12348, 24) | POS Test: (416, 24) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt', 'wp']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 34) 12348 | (416, 34)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6280817180721847\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4948261329722557, pvalue=1.6664974809323804e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5461150796527017, pvalue=2.9069735611764825e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.28800433433932177, pvalue=0.002889695934368703)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.504522996129512, pvalue=2.2231174591790863e-08)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7335 | Kappa = 0.4448 | Precision = 0.6545 | Recall = 0.8342 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5944 | Kappa = 0.3516 | Precision = 0.435 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 35 | TMV: ['agency', 'coh_seq', 'temporality', 'eventfulness', 'feltness', 'setting', 'saying', 'agenthood', 'concreteness', 'temporal_order'] | POS: ['to', 'in', 'wrb', 'vbp', '-rrb-', 'vb', 'md', 'vbz', 'prp', 'pos', 'nnp', 'nn', 'wdt', '-lrb-', 'dt', 'rp', 'vbg', 'nns', 'rb', 'jj', 'wp', 'vbd', 'cc', 'vbn', 'cd']\n",
      "POS Train: (12348, 25) | POS Test: (416, 25) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt', 'wp', 'wrb']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 35) 12348 | (416, 35)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6328291270950586\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4910527373802201, pvalue=2.1353404814836465e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5495928347443545, pvalue=2.200963867543027e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.32754222557312695, pvalue=0.0006478990696578834)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5101464284473549, pvalue=1.458561951963288e-08)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7273 | Kappa = 0.4308 | Precision = 0.6478 | Recall = 0.829 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5928 | Kappa = 0.3485 | Precision = 0.4332 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 36 | TMV: ['agency', 'coh_seq', 'temporality', 'eventfulness', 'feltness', 'setting', 'saying', 'agenthood', 'concreteness', 'temporal_order'] | POS: ['to', 'in', 'nnps', 'wrb', 'vbp', '-rrb-', 'vb', 'md', 'vbz', 'prp', 'pos', 'nnp', 'nn', 'wdt', '-lrb-', 'dt', 'rp', 'vbg', 'nns', 'rb', 'jj', 'wp', 'vbd', 'cc', 'vbn', 'cd']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 26) | POS Test: (416, 26) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nnps', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt', 'wp', 'wrb']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 36) 12348 | (416, 36)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6309990648105284\n",
      "\t POETRY -- SpearmanrResult(correlation=0.49445402330652116, pvalue=1.7079663054470936e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5472781477332784, pvalue=2.6496338128457394e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3268018639932884, pvalue=0.0006675661055919964)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.499776450821974, pvalue=3.154336893862668e-08)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7414 | Kappa = 0.4634 | Precision = 0.6639 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6089 | Kappa = 0.3758 | Precision = 0.4467 | Recall = 0.9561 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 37 | TMV: ['agency', 'coh_seq', 'temporality', 'eventfulness', 'feltness', 'setting', 'saying', 'agenthood', 'concreteness', 'temporal_order'] | POS: ['ex', 'to', 'in', 'nnps', 'wrb', 'vbp', '-rrb-', 'vb', 'md', 'vbz', 'prp', 'pos', 'nnp', 'nn', 'wdt', '-lrb-', 'dt', 'rp', 'vbg', 'nns', 'rb', 'jj', 'wp', 'vbd', 'cc', 'vbn', 'cd']\n",
      "POS Train: (12348, 27) | POS Test: (416, 27) | POS feature-columns: ['cc', 'cd', 'dt', 'ex', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nnps', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt', 'wp', 'wrb']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 37) 12348 | (416, 37)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6305715144920001\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5044607247953452, pvalue=8.728922781354124e-08)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5448274493239254, pvalue=3.2197998314934736e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.28616217916978726, pvalue=0.0030830171950280206)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.49903076506812294, pvalue=3.330960550496822e-08)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7368 | Kappa = 0.4539 | Precision = 0.6598 | Recall = 0.8342 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6034 | Kappa = 0.3668 | Precision = 0.4426 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 38 | TMV: ['agency', 'coh_seq', 'temporality', 'eventfulness', 'feltness', 'setting', 'saying', 'agenthood', 'concreteness', 'temporal_order'] | POS: ['ex', 'to', 'in', 'nnps', 'wrb', 'vbp', '-rrb-', 'vb', 'md', 'vbz', 'prp', 'pos', 'nnp', 'nn', 'wdt', '-lrb-', 'dt', 'rp', 'vbg', 'nns', 'jjs', 'rb', 'jj', 'wp', 'vbd', 'cc', 'vbn', 'cd']\n",
      "POS Train: (12348, 28) | POS Test: (416, 28) | POS feature-columns: ['cc', 'cd', 'dt', 'ex', 'in', 'jj', 'jjs', 'lrb', 'md', 'nn', 'nnp', 'nnps', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt', 'wp', 'wrb']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 38) 12348 | (416, 38)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6345708380051628\n",
      "\t POETRY -- SpearmanrResult(correlation=0.48718678111404595, pvalue=2.7443378685237394e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5478466755202519, pvalue=2.531934318538813e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.35101850094055165, pvalue=0.00024126852619712946)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.520973128531263, pvalue=6.339540434088044e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7339 | Kappa = 0.449 | Precision = 0.6584 | Recall = 0.829 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5994 | Kappa = 0.3611 | Precision = 0.4403 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 39 | TMV: ['agency', 'coh_seq', 'temporality', 'eventfulness', 'feltness', 'setting', 'saying', 'agenthood', 'concreteness', 'temporal_order'] | POS: ['ex', 'to', 'in', 'nnps', 'wrb', 'vbp', '-rrb-', 'vb', 'md', 'vbz', 'prp', 'pos', 'nnp', 'nn', 'wdt', '-lrb-', 'dt', 'rp', 'vbg', 'nns', 'jjs', 'rb', 'jj', 'wp', 'vbd', 'cc', 'vbn', 'jjr', 'cd']\n",
      "POS Train: (12348, 29) | POS Test: (416, 29) | POS feature-columns: ['cc', 'cd', 'dt', 'ex', 'in', 'jj', 'jjr', 'jjs', 'lrb', 'md', 'nn', 'nnp', 'nnps', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt', 'wp', 'wrb']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 39) 12348 | (416, 39)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6298541980524947\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4946716963573461, pvalue=1.683590595508696e-07)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5437043151826336, pvalue=3.518791135840583e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3016872730639859, pvalue=0.0017625201745211727)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5005905608526293, pvalue=2.9717404426932763e-08)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7431 | Kappa = 0.468 | Precision = 0.6667 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6106 | Kappa = 0.379 | Precision = 0.4486 | Recall = 0.9561 | Total Items = 416\n"
     ]
    }
   ],
   "source": [
    "algo = RandomForestClassifier(n_estimators=500, max_depth=None, random_state=seed_value) # the best pos-TMV parameters\n",
    "with open('/Users/sunyambagga/Desktop/txtLAB-2/detecting-narrativity/results/topN_reader_annotated_rf.tsv', 'w') as F:\n",
    "    F.write(\"Top-N\\tSpearman-Correlation\\tF1-2.5\\tKappa-2.5\\tF1-3\\tKappa-3\\n\")\n",
    "    for n in range(5,40):\n",
    "        print(\"\\n###########################################\\n\")\n",
    "        corr, f12, kap2, f13, kap3 = process(algo, n)\n",
    "        F.write(str(n)+'\\t'+corr+'\\t'+f12+'\\t'+kap2+'\\t'+f13+'\\t'+kap3+'\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 1 | TMV: ['agenthood'] | POS: []\n",
      "Train files: 12348 | Annotated files: 416 (12348, 1) 12348 | (416, 1)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.45132478848495694\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.286731442766798, pvalue=0.0025045604414793998)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.4819275990047986, pvalue=2.9217483120943107e-07)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.14036696331255205, pvalue=0.16364133979008577)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.03474379538115243, pvalue=0.7249413679251852)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.6573 | Kappa = 0.3001 | Precision = 0.5975 | Recall = 0.7306 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5143 | Kappa = 0.2296 | Precision = 0.3814 | Recall = 0.7895 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 2 | TMV: ['agenthood'] | POS: ['vbd']\n",
      "POS Train: (12348, 1) | POS Test: (416, 1) | POS feature-columns: ['vbd']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 2) 12348 | (416, 2)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.5672853159834677\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.4075232529370036, pvalue=1.0920342803127963e-05)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5154243794803465, pvalue=2.967602039519219e-08)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.3498872641692305, pvalue=0.00035921220519631255)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.26461379219423764, pvalue=0.0063752365915842515)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7224 | Kappa = 0.4579 | Precision = 0.6869 | Recall = 0.7617 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5854 | Kappa = 0.3546 | Precision = 0.4486 | Recall = 0.8421 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 3 | TMV: ['agenthood'] | POS: ['nn', 'vbd']\n",
      "POS Train: (12348, 2) | POS Test: (416, 2) | POS feature-columns: ['nn', 'vbd']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 3) 12348 | (416, 3)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.5924932994627192\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.563582062287436, pvalue=1.7674211019850558e-10)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5422113244399479, pvalue=3.957662526762997e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.4181195453776211, pvalue=1.500238007969895e-05)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.2327479646768043, pvalue=0.016879629557900247)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7351 | Kappa = 0.4697 | Precision = 0.6814 | Recall = 0.7979 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5941 | Kappa = 0.3615 | Precision = 0.4469 | Recall = 0.886 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 4 | TMV: ['agenthood'] | POS: ['nn', 'vbz', 'vbd']\n",
      "POS Train: (12348, 3) | POS Test: (416, 3) | POS feature-columns: ['nn', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 4) 12348 | (416, 4)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.5870521934586094\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5535689504172977, pvalue=4.2904791465199167e-10)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5539904991156864, pvalue=1.5411055048365845e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.35215043283042147, pvalue=0.0003268398987989468)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.2934851651522106, pvalue=0.0023772986589195816)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7285 | Kappa = 0.4433 | Precision = 0.6597 | Recall = 0.8135 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5795 | Kappa = 0.332 | Precision = 0.4286 | Recall = 0.8947 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 5 | TMV: ['agenthood', 'concreteness'] | POS: ['nn', 'vbz', 'vbd']\n",
      "POS Train: (12348, 3) | POS Test: (416, 3) | POS feature-columns: ['nn', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 5) 12348 | (416, 5)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6633684790974758\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6109819831325299, pvalue=1.7237834505774033e-12)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.6437485464320079, pvalue=2.923575222142942e-13)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.524794110456135, pvalue=2.0845771521045134e-08)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3500371009181058, pvalue=0.00025183458999138966)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7685 | Kappa = 0.5243 | Precision = 0.6946 | Recall = 0.8601 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6176 | Kappa = 0.3919 | Precision = 0.4561 | Recall = 0.9561 | Total Items = 416\n"
     ]
    }
   ],
   "source": [
    "with open('/Users/sunyambagga/Desktop/txtLAB-2/detecting-narrativity/results/topN_reader_annotated_rf.tsv', 'w') as F:\n",
    "    F.write(\"Top-N\\tSpearman-Correlation\\tF1-2.5\\tKappa-2.5\\tF1-3\\tKappa-3\\n\")\n",
    "    for n in range(1,6):\n",
    "        print(\"\\n###########################################\\n\")\n",
    "        corr, f12, kap2, f13, kap3 = process(algo, n)\n",
    "        F.write(str(n)+'\\t'+corr+'\\t'+f12+'\\t'+kap2+'\\t'+f13+'\\t'+kap3+'\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 1 | TMV: ['agenthood'] | POS: []\n",
      "Train files: 12348 | Annotated files: 416 (12348, 1) 12348 | (416, 1)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.5247462368850091\n",
      "NEG 0.5247462368850091\n",
      "NEG 0.5091209542993604\n",
      "NEG 0.5091209542993604\n",
      "NEG 0.5247462368850091\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.5362947671231553\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5835058269478709, pvalue=1.2255848961440741e-10)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5406030411480236, pvalue=1.2962881361711436e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.0221433345490806, pvalue=0.8225915528442072)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.24752592414847577, pvalue=0.01303154838865893)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.6861 | Kappa = 0.382 | Precision = 0.6468 | Recall = 0.7306 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5542 | Kappa = 0.3036 | Precision = 0.422 | Recall = 0.807 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 2 | TMV: ['agenthood'] | POS: ['vbd']\n",
      "POS Train: (12348, 1) | POS Test: (416, 1) | POS feature-columns: ['vbd']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 2) 12348 | (416, 2)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS 0.4877454890544895\n",
      "POS 0.48668105951998614\n",
      "POS 0.48708178419319054\n",
      "POS 0.5\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6997408563418355\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.6590950453769762, pvalue=5.03216494894305e-14)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.706968803986231, pvalue=8.647524071124487e-18)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.3577386000155095, pvalue=0.0001792263944685635)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5662042353383542, pvalue=8.279951418589108e-10)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7871 | Kappa = 0.587 | Precision = 0.7536 | Recall = 0.8238 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6523 | Kappa = 0.4602 | Precision = 0.5024 | Recall = 0.9298 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 3 | TMV: ['agenthood'] | POS: ['nn', 'vbd']\n",
      "POS Train: (12348, 2) | POS Test: (416, 2) | POS feature-columns: ['nn', 'vbd']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 3) 12348 | (416, 3)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.5524354703378577\n",
      "NEG 0.5407078787722526\n",
      "NEG 0.5321794585444727\n",
      "NEG 0.5175944101373702\n",
      "NEG 0.5124250402921062\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.4235082833545034\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.38212926734176705, pvalue=7.393488318187296e-05)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.44605095570033937, pvalue=1.1681789972426913e-06)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.0586575752232688, pvalue=0.5522590382733177)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.2808060027033539, pvalue=0.004655810228096464)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.6885 | Kappa = 0.3261 | Precision = 0.594 | Recall = 0.8187 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5368 | Kappa = 0.2485 | Precision = 0.3835 | Recall = 0.8947 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 4 | TMV: ['agenthood'] | POS: ['vbz', 'nn', 'vbd']\n",
      "POS Train: (12348, 3) | POS Test: (416, 3) | POS feature-columns: ['nn', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 4) 12348 | (416, 4)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.5169220492649483\n",
      "NEG 0.5451691033007475\n",
      "NEG 0.530006935662479\n",
      "NEG 0.5401908551092675\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.37599514948479384\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.39593339448810155, pvalue=3.803673518284691e-05)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.30945336792107064, pvalue=0.0010592392666173218)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.16604087274738732, pvalue=0.09049010328635576)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.3211030001189897, pvalue=0.0011244898537303186)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.6916 | Kappa = 0.3391 | Precision = 0.6015 | Recall = 0.8135 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5493 | Kappa = 0.2714 | Precision = 0.3946 | Recall = 0.9035 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 5 | TMV: ['agenthood', 'concreteness'] | POS: ['vbz', 'nn', 'vbd']\n",
      "POS Train: (12348, 3) | POS Test: (416, 3) | POS feature-columns: ['nn', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 5) 12348 | (416, 5)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.507248461492755\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.5120007500956543\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5950528991267806, pvalue=4.241453394153762e-11)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5022836402495048, pvalue=2.6238345587190775e-08)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.19532298324345945, pvalue=0.045848750489286966)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.34243140644000564, pvalue=0.0004878945160711805)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7333 | Kappa = 0.4327 | Precision = 0.642 | Recall = 0.8549 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5714 | Kappa = 0.3091 | Precision = 0.4125 | Recall = 0.9298 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 6 | TMV: ['agenthood', 'concreteness'] | POS: ['vbz', 'nn', '-rrb-', 'vbd']\n",
      "POS Train: (12348, 4) | POS Test: (416, 4) | POS feature-columns: ['nn', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 6) 12348 | (416, 6)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.5147374436005806\n",
      "NEG 0.5102662089794815\n",
      "NEG 0.5113111823390335\n",
      "NEG 0.5149507462757188\n",
      "NEG 0.5109068048265661\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.48333316789960634\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5625971145918714, pvalue=7.556648512551252e-10)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.4320045645465362, pvalue=2.724563947423519e-06)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.20426943039289702, pvalue=0.03660201986930314)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.34838798838551466, pvalue=0.0003822563451656669)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7221 | Kappa = 0.4011 | Precision = 0.625 | Recall = 0.8549 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5556 | Kappa = 0.2799 | Precision = 0.3977 | Recall = 0.9211 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 7 | TMV: ['agenthood', 'concreteness'] | POS: ['nn', 'vbz', '-rrb-', 'vbd', '-lrb-']\n",
      "POS Train: (12348, 5) | POS Test: (416, 5) | POS feature-columns: ['lrb', 'nn', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 7) 12348 | (416, 7)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.5102985620533675\n",
      "NEG 0.5154937212664145\n",
      "NEG 0.5247733022713151\n",
      "NEG 0.5257638696598916\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.483106375323336\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5657436562137389, pvalue=5.793638614116097e-10)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.41288435135614376, pvalue=8.132138740202208e-06)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.18062487818751394, pvalue=0.06519832394062415)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.3481658174863374, pvalue=0.00038578459656697585)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7165 | Kappa = 0.3912 | Precision = 0.6221 | Recall = 0.8446 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5585 | Kappa = 0.2857 | Precision = 0.4008 | Recall = 0.9211 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 8 | TMV: ['agenthood', 'concreteness'] | POS: ['nn', 'vbz', '-rrb-', 'vbd', 'jj', '-lrb-']\n",
      "POS Train: (12348, 6) | POS Test: (416, 6) | POS feature-columns: ['jj', 'lrb', 'nn', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 8) 12348 | (416, 8)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.5216560690885048\n",
      "NEG 0.5221249783375267\n",
      "NEG 0.5116717166075585\n",
      "NEG 0.5246406858006702\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.44261639629296295\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.49794587090803516, pvalue=1.0090633969781554e-07)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.3786331009442423, pvalue=4.913744527499613e-05)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.0960770628656989, pvalue=0.329575255115546)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.31298475537067505, pvalue=0.0015214822321995248)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.692 | Kappa = 0.3472 | Precision = 0.6078 | Recall = 0.8031 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5312 | Kappa = 0.2453 | Precision = 0.3843 | Recall = 0.8596 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 9 | TMV: ['agenthood', 'concreteness'] | POS: ['nn', 'vbz', 'in', '-rrb-', 'vbd', 'jj', '-lrb-']\n",
      "POS Train: (12348, 7) | POS Test: (416, 7) | POS feature-columns: ['in', 'jj', 'lrb', 'nn', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 9) 12348 | (416, 9)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.532872685735076\n",
      "NEG 0.5403131106842153\n",
      "NEG 0.5520463482191585\n",
      "NEG 0.6018808964953642\n",
      "NEG 0.5446012114750838\n",
      "NEG 0.5711379280476311\n",
      "NEG 0.6023641696810905\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.3754260939064395\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.28716153772804837, pvalue=0.003429718663323328)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.18950441282879527, pvalue=0.04842764980412901)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.07705873144715543, pvalue=0.43460979088358376)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.22978475593824327, pvalue=0.021458228939543005)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.6435 | Kappa = 0.2675 | Precision = 0.5816 | Recall = 0.7202 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5212 | Kappa = 0.2388 | Precision = 0.3849 | Recall = 0.807 | Total Items = 416\n"
     ]
    }
   ],
   "source": [
    "## Run for SVM\n",
    "\n",
    "algo = SVC(probability=True) # the best pos-TMV parameters\n",
    "with open('/Users/sunyambagga/Desktop/txtLAB-2/detecting-narrativity/results/topN_reader_annotated_svm.tsv', 'w') as F:\n",
    "    F.write(\"Top-N\\tSpearman-Correlation\\tF1-2.5\\tKappa-2.5\\tF1-3\\tKappa-3\\n\")\n",
    "    for n in range(1,10):\n",
    "        print(\"\\n###########################################\\n\")\n",
    "        corr, f12, kap2, f13, kap3 = process(algo, n)\n",
    "        F.write(str(n)+'\\t'+corr+'\\t'+f12+'\\t'+kap2+'\\t'+f13+'\\t'+kap3+'\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 10 | TMV: ['agenthood', 'concreteness'] | POS: ['prp', 'vbd', 'jj', '-rrb-', 'nn', 'in', 'vbz', '-lrb-']\n",
      "POS Train: (12348, 8) | POS Test: (416, 8) | POS feature-columns: ['in', 'jj', 'lrb', 'nn', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 10) 12348 | (416, 10)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.5600813913060586\n",
      "NEG 0.6664103390552345\n",
      "NEG 0.607285379361261\n",
      "NEG 0.5294091973755521\n",
      "NEG 0.6247361983730341\n",
      "NEG 0.6343376867062029\n",
      "NEG 0.6219575928075902\n",
      "NEG 0.6741465482315042\n",
      "NEG 0.5155857986330168\n",
      "NEG 0.6149742001972118\n",
      "NEG 0.5418262347133224\n",
      "NEG 0.662933141924044\n",
      "NEG 0.6740764728168182\n",
      "NEG 0.6676604296453328\n",
      "NEG 0.5298856829663474\n",
      "NEG 0.6193182104131488\n",
      "NEG 0.5723494602927631\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.4136059326305057\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.009454709074449074, pvalue=0.9237395057800246)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.34528200977225354, pvalue=0.0003791872075111718)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.1951921464744558, pvalue=0.05163744488395922)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.3218615606149813, pvalue=0.0006427969679626618)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.6256 | Kappa = 0.2705 | Precision = 0.5962 | Recall = 0.658 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5199 | Kappa = 0.2533 | Precision = 0.3991 | Recall = 0.7456 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 11 | TMV: ['eventfulness', 'agenthood', 'concreteness'] | POS: ['prp', 'vbd', 'jj', '-rrb-', 'nn', 'in', 'vbz', '-lrb-']\n",
      "POS Train: (12348, 8) | POS Test: (416, 8) | POS feature-columns: ['in', 'jj', 'lrb', 'nn', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 11) 12348 | (416, 11)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.5628049541321515\n",
      "NEG 0.5552837206621627\n",
      "NEG 0.6484687712788676\n",
      "NEG 0.628938424159365\n",
      "NEG 0.5397509873250457\n",
      "NEG 0.5647042921876275\n",
      "NEG 0.6629799836546544\n",
      "NEG 0.5610722220886101\n",
      "NEG 0.5378882006304809\n",
      "NEG 0.5158945514098413\n",
      "NEG 0.5271589010863516\n",
      "NEG 0.6346560783905053\n",
      "NEG 0.5418627200190175\n",
      "NEG 0.5073678179504323\n",
      "NEG 0.6349302739917\n",
      "NEG 0.5341155084026933\n",
      "NEG 0.6308022810481477\n",
      "NEG 0.6379740900919404\n",
      "NEG 0.5106365266243806\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.4157398257371488\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=-0.015164876211568347, pvalue=0.8779705416392517)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.3464233275706441, pvalue=0.0003615156781395101)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.20270992933310222, pvalue=0.04310775693602932)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.3294085249189038, pvalue=0.00046941974524202656)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.6456 | Kappa = 0.3007 | Precision = 0.6073 | Recall = 0.6891 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5285 | Kappa = 0.2628 | Precision = 0.4018 | Recall = 0.7719 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 12 | TMV: ['eventfulness', 'agenthood', 'concreteness'] | POS: ['prp', 'vbd', 'jj', '-rrb-', 'nn', 'in', 'dt', 'vbz', '-lrb-']\n",
      "POS Train: (12348, 9) | POS Test: (416, 9) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 12) 12348 | (416, 12)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.5605119603944271\n",
      "NEG 0.6690769102496424\n",
      "NEG 0.5367889157156194\n",
      "NEG 0.6021000307454307\n",
      "NEG 0.669640705421875\n",
      "NEG 0.5153540179286656\n",
      "NEG 0.6963706204160984\n",
      "NEG 0.6618152354594266\n",
      "NEG 0.7111111767713186\n",
      "NEG 0.6786388078779846\n",
      "NEG 0.6364263548000365\n",
      "NEG 0.6589911886050986\n",
      "NEG 0.558403765119573\n",
      "NEG 0.6945388792526351\n",
      "NEG 0.5949435182716639\n",
      "NEG 0.6485147389042335\n",
      "NEG 0.6960257057019751\n",
      "NEG 0.6956691436578598\n",
      "NEG 0.651915528366737\n",
      "NEG 0.5881738079608797\n",
      "NEG 0.5680504233154859\n",
      "NEG 0.5709553980585318\n",
      "NEG 0.6411325866558215\n",
      "NEG 0.6506488177127638\n",
      "NEG 0.719370022432659\n",
      "NEG 0.5584790133819789\n",
      "NEG 0.7163453086267065\n",
      "NEG 0.5402151346923023\n",
      "NEG 0.5765499118703901\n",
      "NEG 0.6485115645991698\n",
      "NEG 0.6479883693563516\n",
      "NEG 0.6530264476595385\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.41662323288630904\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=-0.05421287655250476, pvalue=0.5828194418805749)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.35065263567000915, pvalue=0.0003024372955476726)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.20159307021831924, pvalue=0.044295364388856055)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.30609503838447577, pvalue=0.0012081584522240511)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.6103 | Kappa = 0.2664 | Precision = 0.6041 | Recall = 0.6166 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.508 | Kappa = 0.2464 | Precision = 0.401 | Recall = 0.693 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 13 | TMV: ['eventfulness', 'agenthood', 'concreteness'] | POS: ['prp', 'vbd', 'jj', '-rrb-', 'nn', 'in', 'dt', 'vbz', 'nns', '-lrb-']\n",
      "POS Train: (12348, 10) | POS Test: (416, 10) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 13) 12348 | (416, 13)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.6396155586439958\n",
      "NEG 0.6944056275773128\n",
      "NEG 0.6159295484824675\n",
      "NEG 0.692022487013184\n",
      "NEG 0.616640116127139\n",
      "NEG 0.6062492493531354\n",
      "NEG 0.6906351671834605\n",
      "NEG 0.7490964944459335\n",
      "NEG 0.6145430675374838\n",
      "NEG 0.647207769846895\n",
      "NEG 0.6930694872823736\n",
      "NEG 0.6334007414861164\n",
      "NEG 0.591273119121404\n",
      "NEG 0.5572730269985032\n",
      "NEG 0.5120878442196631\n",
      "NEG 0.5155986247225355\n",
      "NEG 0.580634704098404\n",
      "NEG 0.5226844736695997\n",
      "NEG 0.76333055050643\n",
      "NEG 0.7315128289552725\n",
      "NEG 0.6384208963873967\n",
      "NEG 0.6483706959479221\n",
      "NEG 0.6297845485967317\n",
      "NEG 0.7072114726607684\n",
      "NEG 0.6793769004788016\n",
      "NEG 0.7229194567099141\n",
      "NEG 0.5991783850562549\n",
      "NEG 0.5548068172651452\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.40280804568575523\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=-0.026865273179180586, pvalue=0.785590559741455)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.3216433328755987, pvalue=0.0009791164022494492)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.21745126872445755, pvalue=0.029761528520459348)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.31352139969399007, pvalue=0.0009013534569493688)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.5953 | Kappa = 0.2501 | Precision = 0.6 | Recall = 0.5907 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.4934 | Kappa = 0.2295 | Precision = 0.3947 | Recall = 0.6579 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 14 | TMV: ['eventfulness', 'agenthood', 'concreteness', 'setting'] | POS: ['prp', 'vbd', 'jj', '-rrb-', 'nn', 'in', 'dt', 'vbz', 'nns', '-lrb-']\n",
      "POS Train: (12348, 10) | POS Test: (416, 10) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 14) 12348 | (416, 14)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.6768877978250251\n",
      "NEG 0.7256072197369697\n",
      "NEG 0.6559504588373454\n",
      "NEG 0.7363918597928252\n",
      "NEG 0.5984078578170469\n",
      "NEG 0.5291058681203266\n",
      "NEG 0.6466642733000921\n",
      "NEG 0.7076783524852321\n",
      "NEG 0.5232028324642299\n",
      "NEG 0.6656538314816809\n",
      "NEG 0.5358922743938992\n",
      "NEG 0.652459748778748\n",
      "NEG 0.7343283573256705\n",
      "NEG 0.6456708577131078\n",
      "NEG 0.6513207280849174\n",
      "NEG 0.6082366414569298\n",
      "NEG 0.5604803937258722\n",
      "NEG 0.5838972769054589\n",
      "NEG 0.5148438720141757\n",
      "NEG 0.65878691575359\n",
      "NEG 0.5155123834838836\n",
      "NEG 0.56531647973664\n",
      "NEG 0.5068730036730074\n",
      "NEG 0.7294523757287981\n",
      "NEG 0.693511842237404\n",
      "NEG 0.6797287155527268\n",
      "NEG 0.7156580233696032\n",
      "NEG 0.7568014114995665\n",
      "NEG 0.740576764350648\n",
      "NEG 0.6628903810776272\n",
      "NEG 0.604704077412856\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.406580223238345\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=-0.029925217421596907, pvalue=0.7618597940159694)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.32821371924787934, pvalue=0.0007578593416746214)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.2139866036210718, pvalue=0.032533703595275805)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.318025457677462, pvalue=0.0007518429637461077)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.5959 | Kappa = 0.2461 | Precision = 0.5959 | Recall = 0.5959 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.4951 | Kappa = 0.2297 | Precision = 0.3938 | Recall = 0.6667 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 15 | TMV: ['eventfulness', 'setting', 'agenthood', 'concreteness', 'agency'] | POS: ['prp', 'vbd', 'jj', '-rrb-', 'nn', 'in', 'dt', 'vbz', 'nns', '-lrb-']\n",
      "POS Train: (12348, 10) | POS Test: (416, 10) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 15) 12348 | (416, 15)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.7190291209258661\n",
      "NEG 0.6408418349817847\n",
      "NEG 0.5507616661430556\n",
      "NEG 0.6185980752811889\n",
      "NEG 0.5323251164041364\n",
      "NEG 0.5709698178866066\n",
      "NEG 0.5694562249071882\n",
      "NEG 0.6670267080298845\n",
      "NEG 0.5617490162216531\n",
      "NEG 0.6771902058913895\n",
      "NEG 0.7301761236617028\n",
      "NEG 0.5455807438684678\n",
      "NEG 0.56473044594122\n",
      "NEG 0.7197175350391825\n",
      "NEG 0.5368469198321313\n",
      "NEG 0.6156547346743596\n",
      "NEG 0.6233652472584549\n",
      "NEG 0.7220128956407292\n",
      "NEG 0.5672404970069929\n",
      "NEG 0.6569369058549895\n",
      "NEG 0.7160380599947387\n",
      "NEG 0.5815224736739275\n",
      "NEG 0.6658616416433849\n",
      "NEG 0.7230581847372561\n",
      "NEG 0.5394019035777861\n",
      "NEG 0.5300923856458312\n",
      "NEG 0.5273845393564995\n",
      "NEG 0.7041185669020655\n",
      "NEG 0.7273477299341315\n",
      "NEG 0.6539009285657126\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.4054327370590307\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=-0.007390543297361454, pvalue=0.9403539379073022)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.3289946954058248, pvalue=0.0007348532578129587)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.21450664674089895, pvalue=0.03210420870843681)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.29961958530010213, pvalue=0.0015502831537029583)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.6078 | Kappa = 0.27 | Precision = 0.6094 | Recall = 0.6062 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5098 | Kappa = 0.2529 | Precision = 0.4062 | Recall = 0.6842 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 16 | TMV: ['eventfulness', 'setting', 'agenthood', 'concreteness', 'temporality', 'agency'] | POS: ['prp', 'vbd', 'jj', '-rrb-', 'nn', 'in', 'dt', 'vbz', 'nns', '-lrb-']\n",
      "POS Train: (12348, 10) | POS Test: (416, 10) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 16) 12348 | (416, 16)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.748751375161754\n",
      "NEG 0.6105798197573907\n",
      "NEG 0.5928491989307167\n",
      "NEG 0.662370505525206\n",
      "NEG 0.5630272226902185\n",
      "NEG 0.5584931333336999\n",
      "NEG 0.6246254159856655\n",
      "NEG 0.5925705207042522\n",
      "NEG 0.5052691093783809\n",
      "NEG 0.6826523981195111\n",
      "NEG 0.6067731867723315\n",
      "NEG 0.7186031445490755\n",
      "NEG 0.5459665929689568\n",
      "NEG 0.5807785981840219\n",
      "NEG 0.6284653791046276\n",
      "NEG 0.594872349958406\n",
      "NEG 0.6220120187734226\n",
      "NEG 0.6392546545025383\n",
      "NEG 0.7575786193260662\n",
      "NEG 0.6003822872702452\n",
      "NEG 0.710054451276441\n",
      "NEG 0.5211264548110068\n",
      "NEG 0.6296200150868878\n",
      "NEG 0.6303259273099006\n",
      "NEG 0.7466799596051948\n",
      "NEG 0.6011996098686866\n",
      "NEG 0.569473570044814\n",
      "NEG 0.5815463316821106\n",
      "NEG 0.7366297987275756\n",
      "NEG 0.5511484009465839\n",
      "NEG 0.5536423453507637\n",
      "NEG 0.7080350564858391\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.4086937980623551\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=-0.014578819093953017, pvalue=0.8826521239030887)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.3340823589275133, pvalue=0.0005999544533637294)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.21430484842259595, pvalue=0.03227029904910776)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.30825964857872085, pvalue=0.001110138663350808)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.6118 | Kappa = 0.271 | Precision = 0.6071 | Recall = 0.6166 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5097 | Kappa = 0.2497 | Precision = 0.4031 | Recall = 0.693 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 17 | TMV: ['eventfulness', 'setting', 'agenthood', 'concreteness', 'temporality', 'agency'] | POS: ['vbp', 'prp', 'vbd', 'jj', '-rrb-', 'nn', 'in', 'dt', 'vbz', 'nns', '-lrb-']\n",
      "POS Train: (12348, 11) | POS Test: (416, 11) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 17) 12348 | (416, 17)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.5915107950352212\n",
      "NEG 0.5427014625155681\n",
      "NEG 0.6856922210072386\n",
      "NEG 0.5138746605410648\n",
      "NEG 0.6256035414438273\n",
      "NEG 0.6435221998208838\n",
      "NEG 0.5151567348123416\n",
      "NEG 0.5121907687465568\n",
      "NEG 0.5755567858864034\n",
      "NEG 0.5816601561234079\n",
      "NEG 0.508803869050785\n",
      "NEG 0.6456761600447176\n",
      "NEG 0.7233874002669984\n",
      "NEG 0.7002895192455056\n",
      "NEG 0.7651768508845531\n",
      "NEG 0.5426738384801029\n",
      "NEG 0.6340608571326387\n",
      "NEG 0.5339720496555646\n",
      "NEG 0.7371735150809477\n",
      "NEG 0.7271998717688863\n",
      "NEG 0.6692849034639025\n",
      "NEG 0.6989964538369884\n",
      "NEG 0.541961333707982\n",
      "NEG 0.7192807837781943\n",
      "NEG 0.7023398189206781\n",
      "NEG 0.6394019363683229\n",
      "NEG 0.7259033128567937\n",
      "NEG 0.6723202386410042\n",
      "NEG 0.6437119744516072\n",
      "NEG 0.63521457917623\n",
      "NEG 0.7281290440869103\n",
      "NEG 0.5583456369447473\n",
      "NEG 0.6090924438816187\n",
      "NEG 0.7497444860293742\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.42321971106857564\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=-0.024832225479134485, pvalue=0.8014682102217607)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.34617617088062586, pvalue=0.0003652768970225984)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.2433131623124688, pvalue=0.014714981836642684)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.31472279106136314, pvalue=0.0008590217432797741)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.6126 | Kappa = 0.2837 | Precision = 0.619 | Recall = 0.6062 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5149 | Kappa = 0.2628 | Precision = 0.4127 | Recall = 0.6842 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 18 | TMV: ['eventfulness', 'setting', 'agenthood', 'concreteness', 'temporality', 'agency'] | POS: ['vbp', 'prp', 'vbd', 'jj', '-rrb-', 'nn', 'in', 'vbn', 'dt', 'vbz', 'nns', '-lrb-']\n",
      "POS Train: (12348, 12) | POS Test: (416, 12) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 18) 12348 | (416, 18)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.7420815808810182\n",
      "NEG 0.6666059696021378\n",
      "NEG 0.7162349215348196\n",
      "NEG 0.5754703723047131\n",
      "NEG 0.6981458994252133\n",
      "NEG 0.7195097937953022\n",
      "NEG 0.6979005168612897\n",
      "NEG 0.7505084664369572\n",
      "NEG 0.523571774958051\n",
      "NEG 0.5212957248983447\n",
      "NEG 0.5833004596691359\n",
      "NEG 0.6213869191968946\n",
      "NEG 0.5925953800847704\n",
      "NEG 0.610691562172345\n",
      "NEG 0.7678530244851148\n",
      "NEG 0.5399044343239795\n",
      "NEG 0.6555593632506996\n",
      "NEG 0.7444195864819932\n",
      "NEG 0.5582703535665785\n",
      "NEG 0.6181248464485314\n",
      "NEG 0.6114135519617072\n",
      "NEG 0.5323496682243188\n",
      "NEG 0.7619293738291498\n",
      "NEG 0.7856444411275783\n",
      "NEG 0.618136748145561\n",
      "NEG 0.6736856228525789\n",
      "NEG 0.7107320550486751\n",
      "NEG 0.7040950193899558\n",
      "NEG 0.5523410463494383\n",
      "NEG 0.6973311031908017\n",
      "NEG 0.5206750107991367\n",
      "NEG 0.7005057365361905\n",
      "NEG 0.5641672820420452\n",
      "NEG 0.6070593234439021\n",
      "NEG 0.6308198494879924\n",
      "NEG 0.5582182982745456\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.4231014787622701\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.02919394261112114, pvalue=0.7675119291507675)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.3194984780650093, pvalue=0.0010632037218715469)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.22250115321656747, pvalue=0.026080707669957386)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.3366771746203461, pvalue=0.00034414462295391534)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.6223 | Kappa = 0.3113 | Precision = 0.6393 | Recall = 0.6062 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5118 | Kappa = 0.2628 | Precision = 0.4153 | Recall = 0.6667 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 19 | TMV: ['eventfulness', 'setting', 'agenthood', 'concreteness', 'temporality', 'agency'] | POS: ['vbp', 'prp', 'vbd', 'jj', '-rrb-', 'cc', 'nn', 'in', 'vbn', 'dt', 'vbz', 'nns', '-lrb-']\n",
      "POS Train: (12348, 13) | POS Test: (416, 13) | POS feature-columns: ['cc', 'dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 19) 12348 | (416, 19)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.6170043789395244\n",
      "NEG 0.7099305760946439\n",
      "NEG 0.7101174646236299\n",
      "NEG 0.7400553067485931\n",
      "NEG 0.5122320819470451\n",
      "NEG 0.564970379863432\n",
      "NEG 0.7232148913979888\n",
      "NEG 0.8054657461076284\n",
      "NEG 0.751305926987541\n",
      "NEG 0.5975749171470106\n",
      "NEG 0.5116621202621073\n",
      "NEG 0.6601280185292452\n",
      "NEG 0.6328857953683611\n",
      "NEG 0.7313885468225555\n",
      "NEG 0.6932853750431195\n",
      "NEG 0.8226305436763727\n",
      "NEG 0.7368084202918315\n",
      "NEG 0.8241569011122218\n",
      "NEG 0.5616174532929619\n",
      "NEG 0.7187969126896863\n",
      "NEG 0.5161310541946766\n",
      "NEG 0.6247766378255419\n",
      "NEG 0.8126003889270758\n",
      "NEG 0.7935540357148728\n",
      "NEG 0.7688647740918978\n",
      "NEG 0.6989654820458111\n",
      "NEG 0.7759672769008271\n",
      "NEG 0.7474017575308866\n",
      "NEG 0.5372774868598515\n",
      "NEG 0.7609076328134375\n",
      "NEG 0.5629881872886106\n",
      "NEG 0.6356826568400271\n",
      "NEG 0.6311146937014596\n",
      "NEG 0.7979730676425598\n",
      "NEG 0.7128696251065867\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.4243512934808142\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.025428368857871694, pvalue=0.7968037688082612)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.3190890847620119, pvalue=0.0010799824406042522)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.1818618925238208, pvalue=0.07015830242139112)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.32468181525344775, pvalue=0.0005720971497362398)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.612 | Kappa = 0.3089 | Precision = 0.6474 | Recall = 0.5803 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5087 | Kappa = 0.2663 | Precision = 0.422 | Recall = 0.6404 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 20 | TMV: ['eventfulness', 'setting', 'feltness', 'agenthood', 'concreteness', 'temporality', 'agency'] | POS: ['vbp', 'prp', 'vbd', 'jj', '-rrb-', 'cc', 'nn', 'in', 'vbn', 'dt', 'vbz', 'nns', '-lrb-']\n",
      "POS Train: (12348, 13) | POS Test: (416, 13) | POS feature-columns: ['cc', 'dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 20) 12348 | (416, 20)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.6198436955855255\n",
      "NEG 0.6917302817461689\n",
      "NEG 0.7382529528713025\n",
      "NEG 0.7746912130853929\n",
      "NEG 0.5344001366343244\n",
      "NEG 0.5487788713490934\n",
      "NEG 0.567793713780169\n",
      "NEG 0.7250191551889872\n",
      "NEG 0.8096266426442087\n",
      "NEG 0.7676274054591372\n",
      "NEG 0.6170191343032853\n",
      "NEG 0.5455894626332515\n",
      "NEG 0.6937584160373997\n",
      "NEG 0.662443231162003\n",
      "NEG 0.7495797687204804\n",
      "NEG 0.7122687670303095\n",
      "NEG 0.737873872112544\n",
      "NEG 0.6200018879355832\n",
      "NEG 0.5274071898421264\n",
      "NEG 0.7596333670090534\n",
      "NEG 0.5477182450260856\n",
      "NEG 0.6783272653554071\n",
      "NEG 0.8066922617373149\n",
      "NEG 0.7822474296269555\n",
      "NEG 0.7430045109994254\n",
      "NEG 0.7947333447385067\n",
      "NEG 0.7665989363296462\n",
      "NEG 0.5654100227119325\n",
      "NEG 0.7887442956688657\n",
      "NEG 0.5739484923583191\n",
      "NEG 0.5147545899397492\n",
      "NEG 0.686827239901748\n",
      "NEG 0.6781701898765147\n",
      "NEG 0.8065965406925085\n",
      "NEG 0.7455614769178405\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.4274077556284595\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.023223971761574813, pvalue=0.8140861563818769)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.32747045403728814, pvalue=0.0007803643552823666)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.18485219273436862, pvalue=0.0655947661379406)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.33370848552722354, pvalue=0.00039102515615824945)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.6125 | Kappa = 0.3048 | Precision = 0.642 | Recall = 0.5855 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5103 | Kappa = 0.2663 | Precision = 0.4205 | Recall = 0.6491 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 21 | TMV: ['eventfulness', 'setting', 'feltness', 'agenthood', 'concreteness', 'temporality', 'agency'] | POS: ['vbp', 'prp', 'vbd', 'jj', '-rrb-', 'cc', 'vbz', 'nn', 'in', 'vbn', 'dt', 'cd', 'nns', '-lrb-']\n",
      "POS Train: (12348, 14) | POS Test: (416, 14) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 21) 12348 | (416, 21)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.7762833512569474\n",
      "NEG 0.8119496415383535\n",
      "NEG 0.7615889217958307\n",
      "NEG 0.601997908138658\n",
      "NEG 0.7760400452783809\n",
      "NEG 0.694306933356908\n",
      "NEG 0.5718917241307278\n",
      "NEG 0.730088843736745\n",
      "NEG 0.594530140292397\n",
      "NEG 0.7486400504795442\n",
      "NEG 0.7222443816627856\n",
      "NEG 0.7385853859756524\n",
      "NEG 0.7899043454518694\n",
      "NEG 0.7250521422918085\n",
      "NEG 0.533129180692758\n",
      "NEG 0.6695304035279709\n",
      "NEG 0.7527046715922073\n",
      "NEG 0.7883084068052998\n",
      "NEG 0.5942265358877447\n",
      "NEG 0.6695395590461171\n",
      "NEG 0.7649229592782818\n",
      "NEG 0.5126267923206685\n",
      "NEG 0.7925587319619241\n",
      "NEG 0.649565972931414\n",
      "NEG 0.6523827126113728\n",
      "NEG 0.8204081177934567\n",
      "NEG 0.6609731491543679\n",
      "NEG 0.564387575844339\n",
      "NEG 0.6573209577645323\n",
      "NEG 0.7551380859568391\n",
      "NEG 0.5142814177724825\n",
      "NEG 0.5389716440878601\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.4189814905898553\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.033910089056494855, pvalue=0.7312867552091176)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.28108713021543513, pvalue=0.004212564119894812)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.18924156914784349, pvalue=0.05933535540281145)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.36148892143145944, pvalue=0.00011246759110520168)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.6146 | Kappa = 0.3053 | Precision = 0.6404 | Recall = 0.5907 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5137 | Kappa = 0.2697 | Precision = 0.4213 | Recall = 0.6579 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 22 | TMV: ['eventfulness', 'setting', 'feltness', 'agenthood', 'concreteness', 'temporality', 'agency'] | POS: ['vbp', 'prp', 'vbd', 'jj', '-rrb-', 'cc', 'vbz', 'nn', 'in', 'vbn', 'nnp', 'dt', 'cd', 'nns', '-lrb-']\n",
      "POS Train: (12348, 15) | POS Test: (416, 15) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'nn', 'nnp', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 22) 12348 | (416, 22)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.5477126438255986\n",
      "NEG 0.5374809416241138\n",
      "NEG 0.7501495026610284\n",
      "NEG 0.6741513470719082\n",
      "NEG 0.8041265776722702\n",
      "NEG 0.7616926782420762\n",
      "NEG 0.6913304873274017\n",
      "NEG 0.8348244803921994\n",
      "NEG 0.7570327701224492\n",
      "NEG 0.6261831958921269\n",
      "NEG 0.7531625754186941\n",
      "NEG 0.7833554334713676\n",
      "NEG 0.6447598930550126\n",
      "NEG 0.8103510683086214\n",
      "NEG 0.566227968394964\n",
      "NEG 0.806995226763067\n",
      "NEG 0.5164216378668328\n",
      "NEG 0.64747204959607\n",
      "NEG 0.6735156920645153\n",
      "NEG 0.710109669093873\n",
      "NEG 0.6176002395451741\n",
      "NEG 0.6942695775220508\n",
      "NEG 0.6936422861034659\n",
      "NEG 0.663059714958614\n",
      "NEG 0.7729141278256957\n",
      "NEG 0.8479281178947156\n",
      "NEG 0.5803989108662022\n",
      "NEG 0.6685458560459312\n",
      "NEG 0.8289247623987014\n",
      "NEG 0.8552387427014543\n",
      "NEG 0.7992733503017244\n",
      "NEG 0.7974485399297341\n",
      "NEG 0.8551318272863963\n",
      "NEG 0.6955777716055689\n",
      "NEG 0.8252793092296421\n",
      "NEG 0.643454830261717\n",
      "NEG 0.6644162206747452\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.3928032398543895\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.02795485910496763, pvalue=0.777116721148084)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.28921666226400655, pvalue=0.0031960374523041916)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.1641922696269947, pvalue=0.102599769449531)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.24269497193947712, pvalue=0.010998890041241813)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.5899 | Kappa = 0.287 | Precision = 0.6442 | Recall = 0.544 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.4982 | Kappa = 0.2593 | Precision = 0.4233 | Recall = 0.6053 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 23 | TMV: ['eventfulness', 'coh_seq', 'setting', 'feltness', 'agenthood', 'concreteness', 'temporality', 'agency'] | POS: ['vbp', 'prp', 'vbd', 'jj', '-rrb-', 'cc', 'vbz', 'nn', 'in', 'vbn', 'nnp', 'dt', 'cd', 'nns', '-lrb-']\n",
      "POS Train: (12348, 15) | POS Test: (416, 15) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'nn', 'nnp', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 23) 12348 | (416, 23)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.5142930899341213\n",
      "NEG 0.54090573057659\n",
      "NEG 0.572811788011413\n",
      "NEG 0.7913457824229893\n",
      "NEG 0.6496372006800966\n",
      "NEG 0.7903724222337273\n",
      "NEG 0.784610885197277\n",
      "NEG 0.5124640851448165\n",
      "NEG 0.5065641012358396\n",
      "NEG 0.7149645292087771\n",
      "NEG 0.7755349122742359\n",
      "NEG 0.5104873725829394\n",
      "NEG 0.6419305273461015\n",
      "NEG 0.7809349256381152\n",
      "NEG 0.7996955713928717\n",
      "NEG 0.6815193023761527\n",
      "NEG 0.8272063280620217\n",
      "NEG 0.577335594290923\n",
      "NEG 0.8415499392930211\n",
      "NEG 0.5304017304484656\n",
      "NEG 0.540755538112864\n",
      "NEG 0.6693337162685419\n",
      "NEG 0.7274323127469576\n",
      "NEG 0.6953622739243921\n",
      "NEG 0.648282024143986\n",
      "NEG 0.7244020597202835\n",
      "NEG 0.7101663914330575\n",
      "NEG 0.6853019820221513\n",
      "NEG 0.7924079957762628\n",
      "NEG 0.6154333477001799\n",
      "NEG 0.6938191935319532\n",
      "NEG 0.8537200034836712\n",
      "NEG 0.8357471418892649\n",
      "NEG 0.8353886298780394\n",
      "NEG 0.7302226736948964\n",
      "NEG 0.660600441382691\n",
      "NEG 0.5275230269952218\n",
      "NEG 0.6912382255996681\n",
      "NEG 0.5235276060596472\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.3974397346069255\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.03204396380630546, pvalue=0.7455551835639289)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.3018651162344606, pvalue=0.0020466400533070213)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.16536318436431421, pvalue=0.10013860601759163)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.24924928082788675, pvalue=0.008958754617208783)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.5928 | Kappa = 0.2833 | Precision = 0.6369 | Recall = 0.5544 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.4965 | Kappa = 0.2523 | Precision = 0.4167 | Recall = 0.614 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 24 | TMV: ['eventfulness', 'coh_seq', 'setting', 'feltness', 'agenthood', 'concreteness', 'temporality', 'agency'] | POS: ['vbp', 'prp', 'wdt', 'vbd', 'jj', '-rrb-', 'cc', 'vbz', 'nn', 'in', 'vbn', 'nnp', 'dt', 'cd', 'nns', '-lrb-']\n",
      "POS Train: (12348, 16) | POS Test: (416, 16) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'nn', 'nnp', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 24) 12348 | (416, 24)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.807652022119318\n",
      "NEG 0.5470053012212414\n",
      "NEG 0.5297259566058761\n",
      "NEG 0.5990218949830163\n",
      "NEG 0.5903467165576796\n",
      "NEG 0.8480277493105004\n",
      "NEG 0.6655889369655457\n",
      "NEG 0.6817542896266224\n",
      "NEG 0.5235151426600013\n",
      "NEG 0.7988928686773445\n",
      "NEG 0.7306761727846097\n",
      "NEG 0.5601038258880606\n",
      "NEG 0.5471210144055084\n",
      "NEG 0.7866423424331817\n",
      "NEG 0.6092396932696875\n",
      "NEG 0.552701764684094\n",
      "NEG 0.7427147346603113\n",
      "NEG 0.5326133769545874\n",
      "NEG 0.8384648490023927\n",
      "NEG 0.738971620118002\n",
      "NEG 0.7297154300900347\n",
      "NEG 0.5490627170512575\n",
      "NEG 0.5386739950908067\n",
      "NEG 0.7587924550505868\n",
      "NEG 0.8520477644898303\n",
      "NEG 0.6050618977340756\n",
      "NEG 0.5921502272282025\n",
      "NEG 0.6966872160689184\n",
      "NEG 0.7182564298866255\n",
      "NEG 0.7981254542950877\n",
      "NEG 0.5092096985165672\n",
      "NEG 0.7501780622577582\n",
      "NEG 0.6280116439718132\n",
      "NEG 0.7997519909160916\n",
      "NEG 0.7377960449233902\n",
      "NEG 0.8356991194025051\n",
      "NEG 0.7344927804367856\n",
      "NEG 0.7260719795864851\n",
      "NEG 0.5101492585102857\n",
      "NEG 0.6032561676814222\n",
      "NEG 0.819630821473021\n",
      "NEG 0.5811688482673754\n",
      "NEG 0.7367705056508779\n",
      "NEG 0.5744501041383531\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.40940535042581955\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.02914811042087531, pvalue=0.7678665827254342)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.3078391206277288, pvalue=0.0016466745810115586)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.19031610145116384, pvalue=0.05787982453208603)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.26150440049043333, pvalue=0.006020232482986311)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.6145 | Kappa = 0.3265 | Precision = 0.6667 | Recall = 0.5699 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5233 | Kappa = 0.2947 | Precision = 0.4424 | Recall = 0.6404 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 25 | TMV: ['eventfulness', 'coh_seq', 'setting', 'feltness', 'agenthood', 'concreteness', 'temporality', 'agency'] | POS: ['vbp', 'prp', 'wdt', 'vbd', 'jj', 'to', 'cc', '-rrb-', 'vbz', 'nn', 'in', 'vbn', 'nnp', 'dt', 'cd', 'nns', '-lrb-']\n",
      "POS Train: (12348, 17) | POS Test: (416, 17) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'nn', 'nnp', 'nns', 'prp', 'rrb', 'to', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 25) 12348 | (416, 25)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.6048302572854007\n",
      "NEG 0.5699937455294222\n",
      "NEG 0.6267594992493504\n",
      "NEG 0.508571362542607\n",
      "NEG 0.7360079881396029\n",
      "NEG 0.7325159317161906\n",
      "NEG 0.7057205699809596\n",
      "NEG 0.5529788370992998\n",
      "NEG 0.5915225766600163\n",
      "NEG 0.7571718537065812\n",
      "NEG 0.8777598215211215\n",
      "NEG 0.6969094103762179\n",
      "NEG 0.605083137806419\n",
      "NEG 0.7433880269282469\n",
      "NEG 0.5336705877184272\n",
      "NEG 0.576977233445128\n",
      "NEG 0.7949757417762577\n",
      "NEG 0.7634166463622369\n",
      "NEG 0.8207717362392463\n",
      "NEG 0.6035349323900225\n",
      "NEG 0.6997123104198057\n",
      "NEG 0.8267231714037108\n",
      "NEG 0.8616152190786713\n",
      "NEG 0.6127648104035246\n",
      "NEG 0.5093783997653772\n",
      "NEG 0.8234000227606614\n",
      "NEG 0.5511938382815197\n",
      "NEG 0.7326562453082995\n",
      "NEG 0.6312474004461651\n",
      "NEG 0.5557275601264625\n",
      "NEG 0.791180667774829\n",
      "NEG 0.6665902955681475\n",
      "NEG 0.8637163115894297\n",
      "NEG 0.7588350335849925\n",
      "NEG 0.8603086416116184\n",
      "NEG 0.702456860023944\n",
      "NEG 0.7375850271459411\n",
      "NEG 0.5371198125944255\n",
      "NEG 0.7374047197869142\n",
      "NEG 0.7728497630649936\n",
      "NEG 0.7855847109027507\n",
      "NEG 0.7714441589703833\n",
      "NEG 0.5658731885977146\n",
      "NEG 0.7571102748969063\n",
      "NEG 0.6504194226718938\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.4024365452570619\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.017333160650508706, pvalue=0.860687466792028)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.30175534354022426, pvalue=0.0020547475311377787)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.17568839314014809, pvalue=0.08039092880811996)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.2591711964063073, pvalue=0.006502693574999044)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.6162 | Kappa = 0.3311 | Precision = 0.6707 | Recall = 0.5699 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5252 | Kappa = 0.2983 | Precision = 0.4451 | Recall = 0.6404 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 26 | TMV: ['eventfulness', 'coh_seq', 'setting', 'feltness', 'agenthood', 'concreteness', 'temporality', 'agency'] | POS: ['vbp', 'prp', 'wdt', 'cd', 'vbd', 'jj', 'to', 'cc', '-rrb-', 'vbz', 'nn', 'in', 'vbn', 'nnp', 'dt', 'md', 'nns', '-lrb-']\n",
      "POS Train: (12348, 18) | POS Test: (416, 18) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rrb', 'to', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 26) 12348 | (416, 26)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.600937640847002\n",
      "NEG 0.6488806417942901\n",
      "NEG 0.5617792405930283\n",
      "NEG 0.5539869242329795\n",
      "NEG 0.7517477238999936\n",
      "NEG 0.7118664657781615\n",
      "NEG 0.5627945228695291\n",
      "NEG 0.7920327766671454\n",
      "NEG 0.6756121687221788\n",
      "NEG 0.6494248774756203\n",
      "NEG 0.6673078999283413\n",
      "NEG 0.5646401760968585\n",
      "NEG 0.584439112500568\n",
      "NEG 0.8255713448493457\n",
      "NEG 0.6778231658329272\n",
      "NEG 0.6275510748620542\n",
      "NEG 0.7324312827936178\n",
      "NEG 0.6986238742295726\n",
      "NEG 0.8184038289127182\n",
      "NEG 0.7880002970315141\n",
      "NEG 0.5220593367594911\n",
      "NEG 0.6767151815241921\n",
      "NEG 0.5708434072754202\n",
      "NEG 0.8392617186954372\n",
      "NEG 0.7424982473100014\n",
      "NEG 0.6295278312681652\n",
      "NEG 0.6776096351372823\n",
      "NEG 0.5699690475499245\n",
      "NEG 0.7965145962595265\n",
      "NEG 0.6868845077465258\n",
      "NEG 0.5110819745984283\n",
      "NEG 0.6083943692908749\n",
      "NEG 0.8918368535080643\n",
      "NEG 0.7593624052167524\n",
      "NEG 0.7259072670258782\n",
      "NEG 0.6119016284442033\n",
      "NEG 0.5199993873854686\n",
      "NEG 0.7847302981078956\n",
      "NEG 0.5242111709846726\n",
      "NEG 0.5805565951045036\n",
      "NEG 0.7635135226724569\n",
      "NEG 0.7736635359758447\n",
      "NEG 0.8894426565524234\n",
      "NEG 0.8177442326166593\n",
      "NEG 0.8013536529256551\n",
      "NEG 0.6450355738614196\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.4131472168934783\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.04116268798264544, pvalue=0.6767370945610427)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.3056763932704872, pvalue=0.0017824632197221241)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.1939110000077347, pvalue=0.053221239370093265)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.2920911753030117, pvalue=0.002057079812834695)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.5994 | Kappa = 0.3018 | Precision = 0.6524 | Recall = 0.5544 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.518 | Kappa = 0.2877 | Precision = 0.439 | Recall = 0.6316 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 27 | TMV: ['eventfulness', 'coh_seq', 'setting', 'feltness', 'agenthood', 'concreteness', 'temporality', 'agency'] | POS: ['prp', 'vbn', '-lrb-', 'vbp', 'cc', 'rp', 'nn', 'nnp', 'md', 'wdt', 'jj', 'to', 'in', 'dt', 'cd', 'vbd', 'vbz', 'nns', '-rrb-']\n",
      "POS Train: (12348, 19) | POS Test: (416, 19) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rp', 'rrb', 'to', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 27) 12348 | (416, 27)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.6212988265271424\n",
      "NEG 0.6980808416458387\n",
      "NEG 0.5389485992209928\n",
      "NEG 0.5970930588924107\n",
      "NEG 0.7041429270983368\n",
      "NEG 0.5103179756712138\n",
      "NEG 0.6900390068500862\n",
      "NEG 0.5103901752014054\n",
      "NEG 0.8234420122265863\n",
      "NEG 0.7069149811487362\n",
      "NEG 0.679616686724226\n",
      "NEG 0.7016970552519319\n",
      "NEG 0.5567151046585922\n",
      "NEG 0.5330811102715474\n",
      "NEG 0.8487072303650205\n",
      "NEG 0.7360637764302563\n",
      "NEG 0.6475483769495317\n",
      "NEG 0.719648398561253\n",
      "NEG 0.6672925806401563\n",
      "NEG 0.7933371686572168\n",
      "NEG 0.7834967902111198\n",
      "NEG 0.5708082857506046\n",
      "NEG 0.6673231523384188\n",
      "NEG 0.596877139556502\n",
      "NEG 0.8646045800657434\n",
      "NEG 0.8803945589398402\n",
      "NEG 0.6648121024803549\n",
      "NEG 0.672166243165498\n",
      "NEG 0.5906111711886299\n",
      "NEG 0.7752556914228871\n",
      "NEG 0.6949795413650811\n",
      "NEG 0.5385885924910534\n",
      "NEG 0.6383143379343447\n",
      "NEG 0.5107940567251373\n",
      "NEG 0.7635527174063934\n",
      "NEG 0.750496141403451\n",
      "NEG 0.6095567820969973\n",
      "NEG 0.5535276621708652\n",
      "NEG 0.8093383001075721\n",
      "NEG 0.5101425311289712\n",
      "NEG 0.5396992832398709\n",
      "NEG 0.6246438939016885\n",
      "NEG 0.7877460218182105\n",
      "NEG 0.8164229901812793\n",
      "NEG 0.8939376903810016\n",
      "NEG 0.8263439718371665\n",
      "NEG 0.5275967647628944\n",
      "NEG 0.5317399629063544\n",
      "NEG 0.8423063998513057\n",
      "NEG 0.6832138141211446\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.4145570004835329\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.046976214374352364, pvalue=0.6341715105808442)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.3020204780268841, pvalue=0.0020352149047827934)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.1993388055013391, pvalue=0.04677532127634686)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.29498564716880615, pvalue=0.0018467493431940799)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.6089 | Kappa = 0.3167 | Precision = 0.6606 | Recall = 0.5648 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5161 | Kappa = 0.2841 | Precision = 0.4364 | Recall = 0.6316 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 28 | TMV: ['eventfulness', 'coh_seq', 'setting', 'temporal_order', 'feltness', 'agenthood', 'concreteness', 'temporality', 'agency'] | POS: ['prp', 'vbn', '-lrb-', 'vbp', 'cc', 'rp', 'nn', 'nnp', 'md', 'wdt', 'jj', 'to', 'in', 'dt', 'cd', 'vbd', 'vbz', 'nns', '-rrb-']\n",
      "POS Train: (12348, 19) | POS Test: (416, 19) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rp', 'rrb', 'to', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 28) 12348 | (416, 28)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.6492473039567305\n",
      "NEG 0.7340539843484414\n",
      "NEG 0.5417703460333088\n",
      "NEG 0.640932674184812\n",
      "NEG 0.7003558451160583\n",
      "NEG 0.5259633498152231\n",
      "NEG 0.6823286813723252\n",
      "NEG 0.8451733323036343\n",
      "NEG 0.7367590867911966\n",
      "NEG 0.6929431856888024\n",
      "NEG 0.7282260978497238\n",
      "NEG 0.5704389634242075\n",
      "NEG 0.5664075694810707\n",
      "NEG 0.5051843191582954\n",
      "NEG 0.867702967988275\n",
      "NEG 0.7642759155847377\n",
      "NEG 0.6677310425876666\n",
      "NEG 0.7325800153078506\n",
      "NEG 0.6724335645668902\n",
      "NEG 0.7945653835470491\n",
      "NEG 0.8077641018693221\n",
      "NEG 0.6084971559551912\n",
      "NEG 0.7163978725280384\n",
      "NEG 0.6287621124304867\n",
      "NEG 0.881789241225157\n",
      "NEG 0.8743921160597626\n",
      "NEG 0.6921014692380701\n",
      "NEG 0.7200803088289293\n",
      "NEG 0.6267154438827383\n",
      "NEG 0.7741818389971996\n",
      "NEG 0.7169662586225655\n",
      "NEG 0.5620705354820216\n",
      "NEG 0.6586716248398303\n",
      "NEG 0.5506509771747096\n",
      "NEG 0.7747642063239033\n",
      "NEG 0.75848319284339\n",
      "NEG 0.6076927102228904\n",
      "NEG 0.5798966125110037\n",
      "NEG 0.8226101712024636\n",
      "NEG 0.5301471014003227\n",
      "NEG 0.5594446622989142\n",
      "NEG 0.6576605084780398\n",
      "NEG 0.8101517226021318\n",
      "NEG 0.8550784866240195\n",
      "NEG 0.8456774540289759\n",
      "NEG 0.5536178374410561\n",
      "NEG 0.5840405471163982\n",
      "NEG 0.8680235140925068\n",
      "NEG 0.7201495400086825\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.419856347005343\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.04865393631629352, pvalue=0.6220983566445326)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.31800237376672186, pvalue=0.001125697062347796)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.202750002221174, pvalue=0.043065642851335385)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.29820481940416094, pvalue=0.0016358549466324313)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.6072 | Kappa = 0.3121 | Precision = 0.6566 | Recall = 0.5648 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5143 | Kappa = 0.2805 | Precision = 0.4337 | Recall = 0.6316 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 29 | TMV: ['eventfulness', 'coh_seq', 'setting', 'temporal_order', 'feltness', 'agenthood', 'concreteness', 'temporality', 'agency'] | POS: ['prp', 'vbn', '-lrb-', 'vbp', 'cc', 'rp', 'nn', 'nnp', 'md', 'vb', 'wdt', 'jj', 'to', 'in', 'dt', 'cd', 'vbd', 'vbz', 'nns', '-rrb-']\n",
      "POS Train: (12348, 20) | POS Test: (416, 20) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 29) 12348 | (416, 29)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.5159398056972075\n",
      "NEG 0.6479892796854629\n",
      "NEG 0.7431299445280554\n",
      "NEG 0.5734846235679907\n",
      "NEG 0.6160652503443549\n",
      "NEG 0.8216154758966299\n",
      "NEG 0.5289489017866366\n",
      "NEG 0.8518984373843378\n",
      "NEG 0.6299548579781954\n",
      "NEG 0.8536009356365903\n",
      "NEG 0.8009930706779544\n",
      "NEG 0.6580213058927549\n",
      "NEG 0.7362486775482964\n",
      "NEG 0.5826444165266439\n",
      "NEG 0.5415163039034754\n",
      "NEG 0.5074454704302566\n",
      "NEG 0.8813582065691126\n",
      "NEG 0.6126896198244125\n",
      "NEG 0.5507241723503008\n",
      "NEG 0.7680018730392634\n",
      "NEG 0.721275137399275\n",
      "NEG 0.7329533334264693\n",
      "NEG 0.8363589869950923\n",
      "NEG 0.5679043032342035\n",
      "NEG 0.7072328934545551\n",
      "NEG 0.5891795361075112\n",
      "NEG 0.8679312368766633\n",
      "NEG 0.6941909718901247\n",
      "NEG 0.7135757329570038\n",
      "NEG 0.7112755314284185\n",
      "NEG 0.6537981774123056\n",
      "NEG 0.6171355853859642\n",
      "NEG 0.7541064622033119\n",
      "NEG 0.6272057543453947\n",
      "NEG 0.680898271600703\n",
      "NEG 0.5926126856589665\n",
      "NEG 0.6639805682252946\n",
      "NEG 0.7878195738522671\n",
      "NEG 0.5925788953142607\n",
      "NEG 0.8218646318876205\n",
      "NEG 0.6095007398318576\n",
      "NEG 0.7131592566291977\n",
      "NEG 0.8107363217806001\n",
      "NEG 0.8355921049063758\n",
      "NEG 0.873149692774\n",
      "NEG 0.8848175471282553\n",
      "NEG 0.8541909537516864\n",
      "NEG 0.8325641378230408\n",
      "NEG 0.8258241864303083\n",
      "NEG 0.7117747658382992\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.40973537160665957\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.02871754892147876, pvalue=0.7712006350542598)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.2870063245574119, pvalue=0.003447974493832583)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.21026139516447076, pvalue=0.03575416870012983)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.3018971264637321, pvalue=0.0014210306667534372)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.6067 | Kappa = 0.3163 | Precision = 0.6626 | Recall = 0.5596 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5199 | Kappa = 0.2913 | Precision = 0.4417 | Recall = 0.6316 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 30 | TMV: ['eventfulness', 'coh_seq', 'setting', 'temporal_order', 'feltness', 'agenthood', 'concreteness', 'temporality', 'saying', 'agency'] | POS: ['prp', 'vbn', '-lrb-', 'vbp', 'cc', 'rp', 'nn', 'nnp', 'md', 'vb', 'wdt', 'jj', 'to', 'in', 'dt', 'cd', 'vbd', 'vbz', 'nns', '-rrb-']\n",
      "POS Train: (12348, 20) | POS Test: (416, 20) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 30) 12348 | (416, 30)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.5477685434825388\n",
      "NEG 0.6758665534212074\n",
      "NEG 0.77654518873164\n",
      "NEG 0.5694397567017611\n",
      "NEG 0.6558530248532718\n",
      "NEG 0.8181676915996281\n",
      "NEG 0.5422335649328917\n",
      "NEG 0.861743311850988\n",
      "NEG 0.6290812323722934\n",
      "NEG 0.8493119906020864\n",
      "NEG 0.8271804325719972\n",
      "NEG 0.6899634270449676\n",
      "NEG 0.7599819619178221\n",
      "NEG 0.5302667717648849\n",
      "NEG 0.612651978614313\n",
      "NEG 0.5563385706599487\n",
      "NEG 0.5270214278848183\n",
      "NEG 0.6470234443992001\n",
      "NEG 0.5779814361751568\n",
      "NEG 0.7800802658522978\n",
      "NEG 0.7253869902338264\n",
      "NEG 0.7307213708280569\n",
      "NEG 0.8624704986789902\n",
      "NEG 0.6018306627370925\n",
      "NEG 0.7541655358685468\n",
      "NEG 0.6192669602757148\n",
      "NEG 0.8863457373412488\n",
      "NEG 0.7139382064929073\n",
      "NEG 0.7410781238141025\n",
      "NEG 0.7567893538503383\n",
      "NEG 0.6875639729766736\n",
      "NEG 0.6260781127259449\n",
      "NEG 0.7738101009294646\n",
      "NEG 0.6477370324897712\n",
      "NEG 0.7049581217051117\n",
      "NEG 0.6342308865531504\n",
      "NEG 0.6836535586766411\n",
      "NEG 0.791835542407073\n",
      "NEG 0.6173481606121854\n",
      "NEG 0.8398246610462463\n",
      "NEG 0.6306746495733597\n",
      "NEG 0.7464959620953078\n",
      "NEG 0.8115463291293147\n",
      "NEG 0.8564847795213473\n",
      "NEG 0.8708454908708169\n",
      "NEG 0.856505244660269\n",
      "NEG 0.5221793162616323\n",
      "NEG 0.8548466189765794\n",
      "NEG 0.7494976236764777\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.41542901519117437\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.0399457321183345, pvalue=0.6857835265603107)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.2980985751417185, pvalue=0.002341974940493694)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.21836196657264736, pvalue=0.029066968185560663)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.30644596088403364, pvalue=0.0011917505420298844)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.6128 | Kappa = 0.3219 | Precision = 0.6627 | Recall = 0.5699 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5214 | Kappa = 0.2911 | Precision = 0.4398 | Recall = 0.6404 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 31 | TMV: ['eventfulness', 'coh_seq', 'setting', 'temporal_order', 'feltness', 'agenthood', 'concreteness', 'temporality', 'saying', 'agency'] | POS: ['prp', 'rb', 'vbn', '-lrb-', 'vbp', 'cc', 'rp', 'nn', 'nnp', 'md', 'vb', 'wdt', 'jj', 'to', 'in', 'dt', 'cd', 'vbd', 'vbz', 'nns', '-rrb-']\n",
      "POS Train: (12348, 21) | POS Test: (416, 21) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 31) 12348 | (416, 31)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.6652996110335287\n",
      "NEG 0.7044331397106601\n",
      "NEG 0.795445823417125\n",
      "NEG 0.8229432055561157\n",
      "NEG 0.5751518376723128\n",
      "NEG 0.6143809027403728\n",
      "NEG 0.5659146626915168\n",
      "NEG 0.848496037232115\n",
      "NEG 0.6646426825302102\n",
      "NEG 0.8219629069514207\n",
      "NEG 0.7831736898632824\n",
      "NEG 0.6717228157729449\n",
      "NEG 0.5296938598004748\n",
      "NEG 0.6181899611587346\n",
      "NEG 0.5288215352165998\n",
      "NEG 0.7867673466705655\n",
      "NEG 0.6667062916580633\n",
      "NEG 0.6663155084479938\n",
      "NEG 0.7831525734786048\n",
      "NEG 0.7102964004991129\n",
      "NEG 0.7156677431605188\n",
      "NEG 0.650919698232216\n",
      "NEG 0.7227608418441528\n",
      "NEG 0.6372124923688096\n",
      "NEG 0.725134471936466\n",
      "NEG 0.7302981283777498\n",
      "NEG 0.830516188352287\n",
      "NEG 0.7393746943161222\n",
      "NEG 0.7271043826578987\n",
      "NEG 0.752631520888054\n",
      "NEG 0.7346618951099663\n",
      "NEG 0.5459813216933369\n",
      "NEG 0.7226682535735031\n",
      "NEG 0.6190268817648475\n",
      "NEG 0.8895544005903971\n",
      "NEG 0.5760954772688124\n",
      "NEG 0.7749095770083648\n",
      "NEG 0.8816573054811467\n",
      "NEG 0.8061272730802912\n",
      "NEG 0.8967712084342296\n",
      "NEG 0.8894969317113444\n",
      "NEG 0.7213356969623431\n",
      "NEG 0.7936076597894612\n",
      "NEG 0.7657222255274788\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.4105559236081314\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.02388162368485131, pvalue=0.8089204444013117)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.29683299882914765, pvalue=0.0024495161447505806)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.20703575326059345, pvalue=0.0387551004741302)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.3126771787331334, pvalue=0.0009322310988258391)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.5961 | Kappa = 0.2926 | Precision = 0.6446 | Recall = 0.5544 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5 | Kappa = 0.2593 | Precision = 0.4217 | Recall = 0.614 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 32 | TMV: ['eventfulness', 'coh_seq', 'setting', 'temporal_order', 'feltness', 'agenthood', 'concreteness', 'temporality', 'saying', 'agency'] | POS: ['prp', 'pos', 'rb', 'vbn', '-lrb-', 'vbp', 'cc', 'rp', 'nn', 'nnp', 'md', 'vb', 'wdt', 'jj', 'to', 'in', 'dt', 'cd', 'vbd', 'vbz', 'nns', '-rrb-']\n",
      "POS Train: (12348, 22) | POS Test: (416, 22) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 32) 12348 | (416, 32)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.6801679823598235\n",
      "NEG 0.7205777375400159\n",
      "NEG 0.8139272404267393\n",
      "NEG 0.8146491345061052\n",
      "NEG 0.5944268886466544\n",
      "NEG 0.6525997172643341\n",
      "NEG 0.8129302471382067\n",
      "NEG 0.7024863300603853\n",
      "NEG 0.8315960797197025\n",
      "NEG 0.824396179513278\n",
      "NEG 0.7001051456429218\n",
      "NEG 0.5329150003536631\n",
      "NEG 0.6540259398700362\n",
      "NEG 0.5385093021641404\n",
      "NEG 0.8869467536259233\n",
      "NEG 0.7721560601554166\n",
      "NEG 0.5962346894290225\n",
      "NEG 0.6734898180900749\n",
      "NEG 0.7648545111856597\n",
      "NEG 0.5280314172178487\n",
      "NEG 0.7414096203566136\n",
      "NEG 0.7132176952895656\n",
      "NEG 0.6856266382735018\n",
      "NEG 0.7363991446545075\n",
      "NEG 0.6591335621201572\n",
      "NEG 0.7471141823753199\n",
      "NEG 0.7630496672532489\n",
      "NEG 0.8427287057589111\n",
      "NEG 0.7407179320805051\n",
      "NEG 0.7028442929421816\n",
      "NEG 0.7493511107674742\n",
      "NEG 0.7242326936287623\n",
      "NEG 0.50633013299968\n",
      "NEG 0.88530133593719\n",
      "NEG 0.7355990864377191\n",
      "NEG 0.6321147162134925\n",
      "NEG 0.884478260080046\n",
      "NEG 0.5782563424114112\n",
      "NEG 0.8013293466682787\n",
      "NEG 0.8451130873762366\n",
      "NEG 0.8389681677593349\n",
      "NEG 0.8510190618184885\n",
      "NEG 0.7713849177368782\n",
      "NEG 0.8201222587053539\n",
      "NEG 0.7946161881837706\n",
      "NEG 0.5080319586285177\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.4154374932650176\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.028747686085581595, pvalue=0.7709671315875423)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.29652172261393256, pvalue=0.002476639386115016)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.21334287430169432, pvalue=0.03307205077732736)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.3315654630881257, pvalue=0.0004284486988972521)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.5978 | Kappa = 0.2972 | Precision = 0.6485 | Recall = 0.5544 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5018 | Kappa = 0.2629 | Precision = 0.4242 | Recall = 0.614 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 33 | TMV: ['eventfulness', 'coh_seq', 'setting', 'temporal_order', 'feltness', 'agenthood', 'concreteness', 'temporality', 'saying', 'agency'] | POS: ['prp', 'pos', 'rb', 'vbn', '-lrb-', 'vbp', 'vbg', 'cc', 'rp', 'nn', 'nnp', 'md', 'vb', 'wdt', 'jj', 'to', 'in', 'dt', 'cd', 'vbd', 'vbz', 'nns', '-rrb-']\n",
      "POS Train: (12348, 23) | POS Test: (416, 23) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 33) 12348 | (416, 33)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.7025334094275866\n",
      "NEG 0.5578802197898155\n",
      "NEG 0.8442405256411561\n",
      "NEG 0.8282789288274092\n",
      "NEG 0.5246022001839955\n",
      "NEG 0.7429283878568552\n",
      "NEG 0.8125327832174983\n",
      "NEG 0.5237563826766456\n",
      "NEG 0.7464927443312609\n",
      "NEG 0.8666821828545692\n",
      "NEG 0.7229137640404301\n",
      "NEG 0.7577171518359117\n",
      "NEG 0.5904690963753608\n",
      "NEG 0.6668758414032211\n",
      "NEG 0.5281283705536011\n",
      "NEG 0.872770795593359\n",
      "NEG 0.8017130274520806\n",
      "NEG 0.6516467613177609\n",
      "NEG 0.8005458643812702\n",
      "NEG 0.7548336983794068\n",
      "NEG 0.5485515633252241\n",
      "NEG 0.7149836714736598\n",
      "NEG 0.7528524407416719\n",
      "NEG 0.7106807121672383\n",
      "NEG 0.7926943017502714\n",
      "NEG 0.7798442491892632\n",
      "NEG 0.7561875759883907\n",
      "NEG 0.7820507139293096\n",
      "NEG 0.8285003245008147\n",
      "NEG 0.7806897874596441\n",
      "NEG 0.7642562390949901\n",
      "NEG 0.6399699877565898\n",
      "NEG 0.7930620120452185\n",
      "NEG 0.5613584796586732\n",
      "NEG 0.7397066853991912\n",
      "NEG 0.6577906213854501\n",
      "NEG 0.5727139757438262\n",
      "NEG 0.7930539647625323\n",
      "NEG 0.863019186091236\n",
      "NEG 0.5138180384492768\n",
      "NEG 0.8055415070512573\n",
      "NEG 0.8884109469682766\n",
      "NEG 0.8759383576035591\n",
      "NEG 0.7966616650402939\n",
      "NEG 0.5537623114282407\n",
      "NEG 0.8261447856297854\n",
      "NEG 0.791414208915661\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.42021414975836024\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.03050046577754659, pvalue=0.7574223742683499)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.3097028554364059, pvalue=0.0015372548164625454)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.21593694757472504, pvalue=0.030947550410376626)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.349354404763321, pvalue=0.0001965986236374723)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.5922 | Kappa = 0.2875 | Precision = 0.6424 | Recall = 0.5492 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.4946 | Kappa = 0.2523 | Precision = 0.4182 | Recall = 0.6053 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 34 | TMV: ['eventfulness', 'coh_seq', 'setting', 'temporal_order', 'feltness', 'agenthood', 'concreteness', 'temporality', 'saying', 'agency'] | POS: ['prp', 'pos', 'rb', 'vbn', '-lrb-', 'vbp', 'vbg', 'cc', 'rp', 'nn', 'nnp', 'md', 'vb', 'wdt', 'jj', 'to', 'in', 'dt', 'cd', 'vbd', 'wp', 'vbz', 'nns', '-rrb-']\n",
      "POS Train: (12348, 24) | POS Test: (416, 24) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt', 'wp']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 34) 12348 | (416, 34)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.7611281771500013\n",
      "NEG 0.5693240460177822\n",
      "NEG 0.8708449459452834\n",
      "NEG 0.8265806215996012\n",
      "NEG 0.5589685840690344\n",
      "NEG 0.7419031232169032\n",
      "NEG 0.8159249873981236\n",
      "NEG 0.8749887217210579\n",
      "NEG 0.5597058971428814\n",
      "NEG 0.7869436636955047\n",
      "NEG 0.8846350936689032\n",
      "NEG 0.7604087349682292\n",
      "NEG 0.7875074834869621\n",
      "NEG 0.5785547812968732\n",
      "NEG 0.6841242411658895\n",
      "NEG 0.5392901138547852\n",
      "NEG 0.8936728668923402\n",
      "NEG 0.8316456885278887\n",
      "NEG 0.679914636868881\n",
      "NEG 0.8031824634833385\n",
      "NEG 0.6334718773763287\n",
      "NEG 0.5754938525862883\n",
      "NEG 0.7481360589049734\n",
      "NEG 0.7804036493468955\n",
      "NEG 0.7482908231447594\n",
      "NEG 0.8164333244383726\n",
      "NEG 0.7962318473760885\n",
      "NEG 0.7820153586084275\n",
      "NEG 0.8095509963702905\n",
      "NEG 0.8472300625484831\n",
      "NEG 0.8041217059387931\n",
      "NEG 0.7899646971933572\n",
      "NEG 0.6649416345977613\n",
      "NEG 0.8190313943697466\n",
      "NEG 0.574609100507199\n",
      "NEG 0.7547805322754271\n",
      "NEG 0.686454508243912\n",
      "NEG 0.5870486516888059\n",
      "NEG 0.8152986370106753\n",
      "NEG 0.864747619429995\n",
      "NEG 0.5327671862696259\n",
      "NEG 0.8447484852934576\n",
      "NEG 0.9040126578434323\n",
      "NEG 0.8876038334055618\n",
      "NEG 0.8398311951275185\n",
      "NEG 0.5867512798765296\n",
      "NEG 0.7991544087518393\n",
      "NEG 0.8205979297165835\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.423991444352753\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.027524672074002245, pvalue=0.780459276634827)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.3163981238473059, pvalue=0.0011964083176647586)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.2263300846575281, pvalue=0.023554845667956886)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.35158556015987097, pvalue=0.00017770956574671647)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.5938 | Kappa = 0.2921 | Precision = 0.6463 | Recall = 0.5492 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.4964 | Kappa = 0.2558 | Precision = 0.4207 | Recall = 0.6053 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 35 | TMV: ['eventfulness', 'coh_seq', 'setting', 'temporal_order', 'feltness', 'agenthood', 'concreteness', 'temporality', 'saying', 'agency'] | POS: ['prp', 'pos', 'rb', 'vbn', '-lrb-', 'wrb', 'vbp', 'vbg', 'cc', 'rp', 'nn', 'nnp', 'md', 'vb', 'wdt', 'jj', 'to', 'in', 'dt', 'cd', 'vbd', 'wp', 'vbz', 'nns', '-rrb-']\n",
      "POS Train: (12348, 25) | POS Test: (416, 25) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt', 'wp', 'wrb']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 35) 12348 | (416, 35)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.6868877948563409\n",
      "NEG 0.5731576382808046\n",
      "NEG 0.8588844766850086\n",
      "NEG 0.8287797096339673\n",
      "NEG 0.5731547553404226\n",
      "NEG 0.5126788486981673\n",
      "NEG 0.7137165116129232\n",
      "NEG 0.7964142282972092\n",
      "NEG 0.8537858738654907\n",
      "NEG 0.5715171362169411\n",
      "NEG 0.7934577137799859\n",
      "NEG 0.8867077201469435\n",
      "NEG 0.7781692925229434\n",
      "NEG 0.7887590479189509\n",
      "NEG 0.5634609768280835\n",
      "NEG 0.5371652951054882\n",
      "NEG 0.5408283791876756\n",
      "NEG 0.8846844623942258\n",
      "NEG 0.833026037063655\n",
      "NEG 0.6816568949779724\n",
      "NEG 0.7976333844126909\n",
      "NEG 0.5940780678954827\n",
      "NEG 0.5874869801854095\n",
      "NEG 0.7613232194151529\n",
      "NEG 0.7709608940891537\n",
      "NEG 0.7442784218000897\n",
      "NEG 0.8123058526007335\n",
      "NEG 0.774873896226435\n",
      "NEG 0.8101978374274365\n",
      "NEG 0.8013705401787896\n",
      "NEG 0.7989838278192778\n",
      "NEG 0.8429506373899069\n",
      "NEG 0.8039060507094952\n",
      "NEG 0.7957959059550613\n",
      "NEG 0.6666981830720313\n",
      "NEG 0.831367190421263\n",
      "NEG 0.5796628649010381\n",
      "NEG 0.7376659513057569\n",
      "NEG 0.6927346910118233\n",
      "NEG 0.5820886838385582\n",
      "NEG 0.8203782687251472\n",
      "NEG 0.8479516072783029\n",
      "NEG 0.5401418750250745\n",
      "NEG 0.8619643175344245\n",
      "NEG 0.886987167129805\n",
      "NEG 0.8566818248009358\n",
      "NEG 0.5942243612038586\n",
      "NEG 0.8059019754361876\n",
      "NEG 0.8267133222914252\n",
      "NEG 0.5109782967247262\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.4277856493545795\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.042958529892596485, pvalue=0.6634712809576057)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.31872934289769944, pvalue=0.0010949249420958238)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.22688278327696043, pvalue=0.02320809766890612)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.351645861657075, pvalue=0.00017722321962294072)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.5938 | Kappa = 0.2921 | Precision = 0.6463 | Recall = 0.5492 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5036 | Kappa = 0.2664 | Precision = 0.4268 | Recall = 0.614 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 36 | TMV: ['eventfulness', 'coh_seq', 'setting', 'temporal_order', 'feltness', 'agenthood', 'concreteness', 'temporality', 'saying', 'agency'] | POS: ['prp', 'pos', 'rb', 'vbn', '-lrb-', 'wrb', 'vbp', 'vbg', 'cc', 'rp', 'nn', 'nnp', 'md', 'vb', 'wdt', 'jj', 'to', 'nnps', 'in', 'dt', 'cd', 'vbd', 'wp', 'vbz', 'nns', '-rrb-']\n",
      "POS Train: (12348, 26) | POS Test: (416, 26) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nnps', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt', 'wp', 'wrb']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 36) 12348 | (416, 36)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.7100594141711859\n",
      "NEG 0.5990693133540544\n",
      "NEG 0.8847620955384341\n",
      "NEG 0.8310558254631045\n",
      "NEG 0.6100217796323986\n",
      "NEG 0.5442275468245993\n",
      "NEG 0.7082946042170737\n",
      "NEG 0.7894556234935077\n",
      "NEG 0.8562312727178913\n",
      "NEG 0.6071506671706081\n",
      "NEG 0.8229413503707355\n",
      "NEG 0.8963131656566273\n",
      "NEG 0.7750322083958547\n",
      "NEG 0.8250924579929999\n",
      "NEG 0.5705688035661605\n",
      "NEG 0.5647226489755572\n",
      "NEG 0.5594358739352893\n",
      "NEG 0.8584268814632513\n",
      "NEG 0.709253212600254\n",
      "NEG 0.8141285560624503\n",
      "NEG 0.5965157730234817\n",
      "NEG 0.6212112160487405\n",
      "NEG 0.792074581010579\n",
      "NEG 0.801735004016443\n",
      "NEG 0.5241536854697889\n",
      "NEG 0.7823104241707362\n",
      "NEG 0.8392987579996616\n",
      "NEG 0.7662815355522544\n",
      "NEG 0.8241454590138145\n",
      "NEG 0.8303580883628322\n",
      "NEG 0.838338389828984\n",
      "NEG 0.8622153564649141\n",
      "NEG 0.7976902272471116\n",
      "NEG 0.8187579521074192\n",
      "NEG 0.6688146579429518\n",
      "NEG 0.8633131819552972\n",
      "NEG 0.7034312984904453\n",
      "NEG 0.7272196915245329\n",
      "NEG 0.6176875895694064\n",
      "NEG 0.8463900866135694\n",
      "NEG 0.8563514702568329\n",
      "NEG 0.8811103908793617\n",
      "NEG 0.5699160024620237\n",
      "NEG 0.896382019163839\n",
      "NEG 0.8948863576376684\n",
      "NEG 0.6347874072896382\n",
      "NEG 0.8480619492801382\n",
      "NEG 0.863819423109183\n",
      "NEG 0.5341273793464479\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.43163232456195333\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.04551713930640131, pvalue=0.6447499598412902)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.32553266587198265, pvalue=0.0008419404535150852)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.23255995931352053, pvalue=0.019891847584589385)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.3556535919327904, pvalue=0.00014753106675351676)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.5978 | Kappa = 0.2972 | Precision = 0.6485 | Recall = 0.5544 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.509 | Kappa = 0.2735 | Precision = 0.4303 | Recall = 0.6228 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 37 | TMV: ['eventfulness', 'coh_seq', 'setting', 'temporal_order', 'feltness', 'agenthood', 'concreteness', 'temporality', 'saying', 'agency'] | POS: ['prp', 'pos', 'ex', 'rb', 'vbn', '-lrb-', 'wrb', 'vbp', 'vbg', 'cc', 'rp', 'nn', 'nnp', 'md', 'vb', 'wdt', 'jj', 'to', 'nnps', 'in', 'dt', 'cd', 'vbd', 'wp', 'vbz', 'nns', '-rrb-']\n",
      "POS Train: (12348, 27) | POS Test: (416, 27) | POS feature-columns: ['cc', 'cd', 'dt', 'ex', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nnps', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt', 'wp', 'wrb']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 37) 12348 | (416, 37)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.6846577324514261\n",
      "NEG 0.6192675160265233\n",
      "NEG 0.825827943819772\n",
      "NEG 0.6324045240085551\n",
      "NEG 0.5778072157719395\n",
      "NEG 0.6939982923926309\n",
      "NEG 0.7855494287077857\n",
      "NEG 0.8523248703748303\n",
      "NEG 0.6337007982771884\n",
      "NEG 0.8464900594960845\n",
      "NEG 0.8384503542136117\n",
      "NEG 0.8537517869950513\n",
      "NEG 0.5774626826489706\n",
      "NEG 0.5652793223007407\n",
      "NEG 0.5720829796457656\n",
      "NEG 0.8893824233475237\n",
      "NEG 0.8783441369898551\n",
      "NEG 0.7301274983791263\n",
      "NEG 0.826312127166935\n",
      "NEG 0.5583081591965696\n",
      "NEG 0.6535959567159154\n",
      "NEG 0.818718853171041\n",
      "NEG 0.7938692434438345\n",
      "NEG 0.5542918233220533\n",
      "NEG 0.8134953758404477\n",
      "NEG 0.8566842694535227\n",
      "NEG 0.7715252615691789\n",
      "NEG 0.818755021076583\n",
      "NEG 0.851030315616664\n",
      "NEG 0.8681733486409164\n",
      "NEG 0.8669705488057936\n",
      "NEG 0.815974192173129\n",
      "NEG 0.8249745688115049\n",
      "NEG 0.6944816237147553\n",
      "NEG 0.8842005217564906\n",
      "NEG 0.718018332974926\n",
      "NEG 0.7542889030791092\n",
      "NEG 0.6357800306856574\n",
      "NEG 0.867150149103616\n",
      "NEG 0.8674514733919655\n",
      "NEG 0.8967793490590964\n",
      "NEG 0.5889710138895046\n",
      "NEG 0.66364894807103\n",
      "NEG 0.8687780398476529\n",
      "NEG 0.8833447086587625\n",
      "NEG 0.5556412542038471\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.4341243764195798\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.04733427415538923, pvalue=0.6315866474519136)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.3326920188219306, pvalue=0.0006343644471421703)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.24029173239188326, pvalue=0.016035711478057716)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.35972013779830875, pvalue=0.00012217754985585497)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.6094 | Kappa = 0.3126 | Precision = 0.6548 | Recall = 0.5699 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5177 | Kappa = 0.2839 | Precision = 0.4345 | Recall = 0.6404 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 38 | TMV: ['eventfulness', 'coh_seq', 'setting', 'temporal_order', 'feltness', 'agenthood', 'concreteness', 'temporality', 'saying', 'agency'] | POS: ['prp', 'pos', 'ex', 'rb', 'vbn', '-lrb-', 'wrb', 'vbp', 'jjs', 'vbg', 'cc', 'rp', 'nn', 'nnp', 'md', 'vb', 'wdt', 'jj', 'to', 'nnps', 'in', 'dt', 'cd', 'vbd', 'wp', 'vbz', 'nns', '-rrb-']\n",
      "POS Train: (12348, 28) | POS Test: (416, 28) | POS feature-columns: ['cc', 'cd', 'dt', 'ex', 'in', 'jj', 'jjs', 'lrb', 'md', 'nn', 'nnp', 'nnps', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt', 'wp', 'wrb']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 38) 12348 | (416, 38)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.6903655727144233\n",
      "NEG 0.6294925787215293\n",
      "NEG 0.8053444944253764\n",
      "NEG 0.638565703529742\n",
      "NEG 0.5813095830839223\n",
      "NEG 0.7031255664586586\n",
      "NEG 0.7652602928069051\n",
      "NEG 0.8375347551607409\n",
      "NEG 0.6427265936410135\n",
      "NEG 0.8493232298739511\n",
      "NEG 0.8398958458322419\n",
      "NEG 0.8603820256452411\n",
      "NEG 0.5775017483232231\n",
      "NEG 0.5670799539061122\n",
      "NEG 0.5687050435273997\n",
      "NEG 0.8791255789973345\n",
      "NEG 0.7311546813857925\n",
      "NEG 0.8184609705969929\n",
      "NEG 0.66443157992696\n",
      "NEG 0.819920464910246\n",
      "NEG 0.7967642720839289\n",
      "NEG 0.5706776565119721\n",
      "NEG 0.8230087057907255\n",
      "NEG 0.857735084130348\n",
      "NEG 0.7597911167437528\n",
      "NEG 0.814163591258708\n",
      "NEG 0.8506190030239446\n",
      "NEG 0.8766339385883198\n",
      "NEG 0.8639826903427914\n",
      "NEG 0.803587943145962\n",
      "NEG 0.8215955378244864\n",
      "NEG 0.6959061758942006\n",
      "NEG 0.6088238569848674\n",
      "NEG 0.7570137942652162\n",
      "NEG 0.6170210733943492\n",
      "NEG 0.8594459731418462\n",
      "NEG 0.8578190883599985\n",
      "NEG 0.5868749758291912\n",
      "NEG 0.6634952977760762\n",
      "NEG 0.8753005447279603\n",
      "NEG 0.5585283626716675\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.43891179267281505\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.05154148171593847, pvalue=0.6015532426618944)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.33947107148990396, pvalue=0.00048213757625941605)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.2500983735929671, pvalue=0.012088252331130736)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.35976337089608196, pvalue=0.00012193120504645874)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.6137 | Kappa = 0.3135 | Precision = 0.6512 | Recall = 0.5803 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5175 | Kappa = 0.2802 | Precision = 0.4302 | Recall = 0.6491 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 39 | TMV: ['eventfulness', 'coh_seq', 'setting', 'temporal_order', 'feltness', 'agenthood', 'concreteness', 'temporality', 'saying', 'agency'] | POS: ['prp', 'pos', 'ex', 'jjr', 'rb', 'vbn', '-lrb-', 'wrb', 'vbp', 'jjs', 'vbg', 'cc', 'rp', 'nn', 'nnp', 'md', 'vb', 'wdt', 'jj', 'to', 'nnps', 'in', 'dt', 'cd', 'vbd', 'wp', 'vbz', 'nns', '-rrb-']\n",
      "POS Train: (12348, 29) | POS Test: (416, 29) | POS feature-columns: ['cc', 'cd', 'dt', 'ex', 'in', 'jj', 'jjr', 'jjs', 'lrb', 'md', 'nn', 'nnp', 'nnps', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt', 'wp', 'wrb']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 39) 12348 | (416, 39)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG 0.7062712253725658\n",
      "NEG 0.635814333594747\n",
      "NEG 0.8097571190216493\n",
      "NEG 0.6538386280733044\n",
      "NEG 0.6021996916264144\n",
      "NEG 0.6998949642969213\n",
      "NEG 0.7592874870305243\n",
      "NEG 0.8308436858669275\n",
      "NEG 0.6593268100549672\n",
      "NEG 0.861707325934626\n",
      "NEG 0.8453679872661104\n",
      "NEG 0.8757686829783256\n",
      "NEG 0.5791527486919746\n",
      "NEG 0.597099270560159\n",
      "NEG 0.5771282727091352\n",
      "NEG 0.8593740727448488\n",
      "NEG 0.7457315562752279\n",
      "NEG 0.8094875876891073\n",
      "NEG 0.6879678257357434\n",
      "NEG 0.8320944217700054\n",
      "NEG 0.8090800335928019\n",
      "NEG 0.5940373757651879\n",
      "NEG 0.8420361437552293\n",
      "NEG 0.870954629378301\n",
      "NEG 0.7639151361804397\n",
      "NEG 0.7970021353000258\n",
      "NEG 0.8608872305629972\n",
      "NEG 0.8691510982246552\n",
      "NEG 0.8131017831657144\n",
      "NEG 0.8292596917833709\n",
      "NEG 0.6970532054662567\n",
      "NEG 0.6187162880769302\n",
      "NEG 0.7742055895201718\n",
      "NEG 0.6200527644171829\n",
      "NEG 0.8713281883053724\n",
      "NEG 0.8552873605129955\n",
      "NEG 0.607591856893257\n",
      "NEG 0.5164692696021531\n",
      "NEG 0.5129691743100881\n",
      "NEG 0.684601018888469\n",
      "NEG 0.6000350610414582\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.4436031051108554\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.05668057728601064, pvalue=0.5657562829817294)\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.3509596687728985, pvalue=0.00029851585378726437)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.25256668671242516, pvalue=0.011239722175988153)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.3631387767522331, pvalue=0.00010406200396970765)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.6213 | Kappa = 0.3238 | Precision = 0.6552 | Recall = 0.5907 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5208 | Kappa = 0.2836 | Precision = 0.431 | Recall = 0.6579 | Total Items = 416\n"
     ]
    }
   ],
   "source": [
    "## Run for SVM\n",
    "\n",
    "algo = SVC(probability=True) # the best pos-TMV parameters\n",
    "with open('/Users/sunyambagga/Desktop/txtLAB-2/detecting-narrativity/results/topN_reader_annotated_svm_part2.tsv', 'w') as F:\n",
    "    F.write(\"Top-N\\tSpearman-Correlation\\tF1-2.5\\tKappa-2.5\\tF1-3\\tKappa-3\\n\")\n",
    "    for n in range(10,40):\n",
    "        print(\"\\n###########################################\\n\")\n",
    "        corr, f12, kap2, f13, kap3 = process(algo, n)\n",
    "        F.write(str(n)+'\\t'+corr+'\\t'+f12+'\\t'+kap2+'\\t'+f13+'\\t'+kap3+'\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 1 | TMV: ['agenthood'] | POS: []\n",
      "Train files: 12348 | Annotated files: 416 (12348, 1) 12348 | (416, 1)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.5362947671231553\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5835058269478709, pvalue=1.2255848961440741e-10)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5406030411480236, pvalue=1.2962881361711436e-09)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.0221433345490806, pvalue=0.8225915528442072)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.24752592414847577, pvalue=0.01303154838865893)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.6857 | Kappa = 0.3695 | Precision = 0.6344 | Recall = 0.7461 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5455 | Kappa = 0.2844 | Precision = 0.4097 | Recall = 0.8158 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 2 | TMV: ['agenthood'] | POS: ['vbd']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 1) | POS Test: (416, 1) | POS feature-columns: ['vbd']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 2) 12348 | (416, 2)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.7192082456355589\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.6702556334781168, pvalue=1.3108578815542978e-14)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.7153773927246286, pvalue=2.3422367662088894e-18)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5342195876103274, pvalue=4.3800162833617e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5360969858594715, pvalue=9.020680810971483e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7797 | Kappa = 0.5809 | Precision = 0.7624 | Recall = 0.7979 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.6646 | Kappa = 0.4837 | Precision = 0.5198 | Recall = 0.9211 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 3 | TMV: ['agenthood'] | POS: ['vbd', 'nn']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 2) | POS Test: (416, 2) | POS feature-columns: ['nn', 'vbd']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 3) 12348 | (416, 3)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.5863133110543578\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5165073795264059, pvalue=2.7446130194779397e-08)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.479741172486354, pvalue=1.304538631476834e-07)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5326377520273834, pvalue=4.9595562094121815e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.503048957307364, pvalue=9.608494993576905e-08)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7124 | Kappa = 0.3859 | Precision = 0.6216 | Recall = 0.8342 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5523 | Kappa = 0.2772 | Precision = 0.3977 | Recall = 0.9035 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 4 | TMV: ['agenthood'] | POS: ['vbd', 'nn', 'vbz']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 3) | POS Test: (416, 3) | POS feature-columns: ['nn', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 4) 12348 | (416, 4)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.5825729368806464\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5000864803369769, pvalue=8.717868151590785e-08)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.46942396285000443, pvalue=2.6176843335718784e-07)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5333327224146441, pvalue=4.696419487747572e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5079787494215403, pvalue=6.858628758061079e-08)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7064 | Kappa = 0.3719 | Precision = 0.6154 | Recall = 0.829 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5561 | Kappa = 0.2829 | Precision = 0.4 | Recall = 0.9123 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 5 | TMV: ['agenthood', 'concreteness'] | POS: ['vbd', 'nn', 'vbz']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 3) | POS Test: (416, 3) | POS feature-columns: ['nn', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 5) 12348 | (416, 5)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6722235351384643\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.6917071875438185, pvalue=8.333664386797034e-16)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6116102546804509, pvalue=1.6127293669962993e-12)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5569772746200271, pvalue=6.813082002523328e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5802683563187001, pvalue=2.4936632185075126e-10)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7517 | Kappa = 0.4747 | Precision = 0.6614 | Recall = 0.8705 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.587 | Kappa = 0.3356 | Precision = 0.4252 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 6 | TMV: ['agenthood', 'concreteness'] | POS: ['vbd', 'nn', 'vbz', '-rrb-']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 4) | POS Test: (416, 4) | POS feature-columns: ['nn', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 6) 12348 | (416, 6)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6637033049183337\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.6616905386906088, pvalue=3.6991838977551656e-14)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5879813449298786, pvalue=1.7902261805795542e-11)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5539847528778674, pvalue=8.771892305823897e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5790914510149503, pvalue=2.7631387979142777e-10)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.738 | Kappa = 0.4343 | Precision = 0.6377 | Recall = 0.8756 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5699 | Kappa = 0.3027 | Precision = 0.4075 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 7 | TMV: ['agenthood', 'concreteness'] | POS: ['vbz', '-rrb-', '-lrb-', 'vbd', 'nn']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 5) | POS Test: (416, 5) | POS feature-columns: ['lrb', 'nn', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 7) 12348 | (416, 7)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6629693140915295\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.6597154613056596, pvalue=4.676570353311358e-14)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.587220618349766, pvalue=1.9282747541096766e-11)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5544359649949696, pvalue=8.445291814171582e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5790914510149503, pvalue=2.7631387979142777e-10)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.738 | Kappa = 0.4343 | Precision = 0.6377 | Recall = 0.8756 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5699 | Kappa = 0.3027 | Precision = 0.4075 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 8 | TMV: ['agenthood', 'concreteness'] | POS: ['vbz', '-rrb-', 'jj', '-lrb-', 'vbd', 'nn']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 6) | POS Test: (416, 6) | POS feature-columns: ['jj', 'lrb', 'nn', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 8) 12348 | (416, 8)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6665663110548752\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.6472085242834889, pvalue=1.9834489176004368e-13)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.5911262999379053, pvalue=1.3141423883129854e-11)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5727593260262525, pvalue=1.7224211075297003e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5837810583732593, pvalue=1.831325208806366e-10)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.738 | Kappa = 0.4343 | Precision = 0.6377 | Recall = 0.8756 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5699 | Kappa = 0.3027 | Precision = 0.4075 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 9 | TMV: ['agenthood', 'concreteness'] | POS: ['vbz', '-rrb-', 'jj', '-lrb-', 'vbd', 'in', 'nn']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 7) | POS Test: (416, 7) | POS feature-columns: ['in', 'jj', 'lrb', 'nn', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 9) 12348 | (416, 9)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6723189214278957\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.6078541170200612, pvalue=1.2438844876505444e-11)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.587290197000386, pvalue=1.915233333805756e-11)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5762445506549028, pvalue=1.2587856425422456e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5809528828729219, pvalue=2.348744467506893e-10)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7301 | Kappa = 0.4237 | Precision = 0.6371 | Recall = 0.8549 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5845 | Kappa = 0.3291 | Precision = 0.4208 | Recall = 0.9561 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 10 | TMV: ['agenthood', 'concreteness'] | POS: ['prp', 'vbz', '-rrb-', 'jj', '-lrb-', 'vbd', 'in', 'nn']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 8) | POS Test: (416, 8) | POS feature-columns: ['in', 'jj', 'lrb', 'nn', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 10) 12348 | (416, 10)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6815488790043547\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.6027664534983728, pvalue=2.038649013109733e-11)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6171858238834718, pvalue=8.87314657206827e-13)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5465993959266796, pvalue=1.6194154129242774e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5690096958874206, pvalue=6.546578714516884e-10)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7373 | Kappa = 0.438 | Precision = 0.6423 | Recall = 0.8653 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5829 | Kappa = 0.3261 | Precision = 0.4192 | Recall = 0.9561 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 11 | TMV: ['eventfulness', 'concreteness', 'agenthood'] | POS: ['prp', 'vbz', '-rrb-', 'jj', '-lrb-', 'vbd', 'in', 'nn']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 8) | POS Test: (416, 8) | POS feature-columns: ['in', 'jj', 'lrb', 'nn', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 11) 12348 | (416, 11)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6823297291796384\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.6080804869209151, pvalue=1.2165911455338663e-11)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.616281301425411, pvalue=9.784114346671371e-13)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5459511026549813, pvalue=1.707748719758399e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5693399499267381, pvalue=6.367113112056527e-10)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7401 | Kappa = 0.4429 | Precision = 0.6437 | Recall = 0.8705 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5813 | Kappa = 0.3231 | Precision = 0.4176 | Recall = 0.9561 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 12 | TMV: ['eventfulness', 'concreteness', 'agenthood'] | POS: ['prp', 'vbz', '-rrb-', 'jj', '-lrb-', 'vbd', 'in', 'dt', 'nn']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 9) | POS Test: (416, 9) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 12) 12348 | (416, 12)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6803251996479788\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.6030098011417906, pvalue=1.9914341790545744e-11)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6110443483220745, pvalue=1.7124373978868488e-12)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5482486540098802, pvalue=1.4140256787852068e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5677126982057371, pvalue=7.299533663942653e-10)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7373 | Kappa = 0.438 | Precision = 0.6423 | Recall = 0.8653 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5829 | Kappa = 0.3261 | Precision = 0.4192 | Recall = 0.9561 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 13 | TMV: ['eventfulness', 'concreteness', 'agenthood'] | POS: ['prp', 'vbz', '-rrb-', 'nns', 'jj', '-lrb-', 'vbd', 'in', 'dt', 'nn']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 10) | POS Test: (416, 10) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 13) 12348 | (416, 13)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.678449658394073\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5996708451041975, pvalue=2.741895223644649e-11)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6154602733480943, pvalue=1.0688828255608864e-12)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5482123495866652, pvalue=1.4182647115088723e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5618041532114016, pvalue=1.1915648491764686e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7325 | Kappa = 0.4244 | Precision = 0.635 | Recall = 0.8653 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5782 | Kappa = 0.3172 | Precision = 0.4144 | Recall = 0.9561 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 14 | TMV: ['eventfulness', 'concreteness', 'agenthood', 'setting'] | POS: ['prp', 'vbz', '-rrb-', 'nns', 'jj', '-lrb-', 'vbd', 'in', 'dt', 'nn']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 10) | POS Test: (416, 10) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 14) 12348 | (416, 14)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.679029563676411\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5828628799658051, pvalue=1.298595001232383e-10)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6194865579306418, pvalue=6.910393480762722e-13)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5548456863426828, pvalue=8.158870042172693e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5659653541068025, pvalue=8.446369452651273e-10)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7325 | Kappa = 0.4244 | Precision = 0.635 | Recall = 0.8653 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5782 | Kappa = 0.3172 | Precision = 0.4144 | Recall = 0.9561 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 15 | TMV: ['agency', 'agenthood', 'eventfulness', 'concreteness', 'setting'] | POS: ['prp', 'vbz', '-rrb-', 'nns', 'jj', '-lrb-', 'vbd', 'in', 'dt', 'nn']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 10) | POS Test: (416, 10) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 15) 12348 | (416, 15)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6844626633316861\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5918837205148278, pvalue=5.699131151819016e-11)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6400261755936834, pvalue=6.752732014899845e-14)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5557377378845398, pvalue=7.567195589236481e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5689976866496271, pvalue=6.553195414635898e-10)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7313 | Kappa = 0.424 | Precision = 0.636 | Recall = 0.8601 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5813 | Kappa = 0.3231 | Precision = 0.4176 | Recall = 0.9561 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 16 | TMV: ['agency', 'agenthood', 'eventfulness', 'temporality', 'concreteness', 'setting'] | POS: ['prp', 'vbz', '-rrb-', 'nns', 'jj', '-lrb-', 'vbd', 'in', 'dt', 'nn']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 10) | POS Test: (416, 10) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 16) 12348 | (416, 16)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.685027393522524\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5959923342153237, pvalue=3.883431330671459e-11)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6395159321558029, pvalue=7.169552772455889e-14)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5596326838609036, pvalue=5.43304118004357e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5675805965900101, pvalue=7.380729122569071e-10)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7297 | Kappa = 0.4195 | Precision = 0.6336 | Recall = 0.8601 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5798 | Kappa = 0.3201 | Precision = 0.416 | Recall = 0.9561 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 17 | TMV: ['agency', 'agenthood', 'eventfulness', 'temporality', 'concreteness', 'setting'] | POS: ['vbp', 'prp', 'vbz', '-rrb-', 'nns', 'jj', '-lrb-', 'vbd', 'in', 'dt', 'nn']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 11) | POS Test: (416, 11) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 17) 12348 | (416, 17)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6849036581610588\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5944643373845608, pvalue=4.481736860040864e-11)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.637224475262049, pvalue=9.369474809330495e-14)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5596845473226395, pvalue=5.408968273713124e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5656651231619684, pvalue=8.660084484530371e-10)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7329 | Kappa = 0.4286 | Precision = 0.6385 | Recall = 0.8601 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5829 | Kappa = 0.3261 | Precision = 0.4192 | Recall = 0.9561 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 18 | TMV: ['agency', 'agenthood', 'eventfulness', 'temporality', 'concreteness', 'setting'] | POS: ['vbp', 'prp', 'vbz', '-rrb-', 'nns', 'jj', '-lrb-', 'vbd', 'in', 'dt', 'vbn', 'nn']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 12) | POS Test: (416, 12) | POS feature-columns: ['dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 18) 12348 | (416, 18)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6800748106382215\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5773960468601866, pvalue=2.113276214190694e-10)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.630215585856255, pvalue=2.0953197959039526e-13)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.555333202883, pvalue=7.830215852183407e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5615399499799476, pvalue=1.2176902453429277e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7285 | Kappa = 0.4191 | Precision = 0.6346 | Recall = 0.8549 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5775 | Kappa = 0.3175 | Precision = 0.4154 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 19 | TMV: ['agency', 'agenthood', 'eventfulness', 'temporality', 'concreteness', 'setting'] | POS: ['vbp', 'prp', 'vbz', '-rrb-', 'nns', 'jj', '-lrb-', 'vbd', 'cc', 'dt', 'in', 'vbn', 'nn']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 13) | POS Test: (416, 13) | POS feature-columns: ['cc', 'dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 19) 12348 | (416, 19)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.679791903575249\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.578437348404114, pvalue=1.9274115240492674e-10)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6335599996627259, pvalue=1.4307773571292532e-13)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5598453240540208, pvalue=5.334992095851592e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5645902963794622, pvalue=9.468553852833848e-10)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7269 | Kappa = 0.4146 | Precision = 0.6322 | Recall = 0.8549 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.576 | Kappa = 0.3145 | Precision = 0.4138 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 20 | TMV: ['feltness', 'agency', 'agenthood', 'eventfulness', 'temporality', 'concreteness', 'setting'] | POS: ['vbp', 'prp', 'vbz', '-rrb-', 'nns', 'jj', '-lrb-', 'vbd', 'cc', 'dt', 'in', 'vbn', 'nn']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 13) | POS Test: (416, 13) | POS feature-columns: ['cc', 'dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 20) 12348 | (416, 20)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6804693630172329\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.577928016127193, pvalue=2.0162682786919944e-10)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6374424883673252, pvalue=9.13480770294229e-14)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.560218740978519, pvalue=5.166910178562598e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.566619857566541, pvalue=7.997880817353311e-10)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7301 | Kappa = 0.4237 | Precision = 0.6371 | Recall = 0.8549 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5791 | Kappa = 0.3205 | Precision = 0.417 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 21 | TMV: ['feltness', 'agency', 'agenthood', 'eventfulness', 'temporality', 'concreteness', 'setting'] | POS: ['vbp', 'prp', 'vbz', '-rrb-', 'cd', 'nns', 'jj', '-lrb-', 'vbd', 'cc', 'in', 'dt', 'vbn', 'nn']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 14) | POS Test: (416, 14) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'nn', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 21) 12348 | (416, 21)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6816795348745811\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5788108587405226, pvalue=1.8646567020410813e-10)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.638254239291226, pvalue=8.310091452664919e-14)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5633253623364975, pvalue=3.9527174135541817e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5653468783604443, pvalue=8.892286855580013e-10)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7285 | Kappa = 0.4191 | Precision = 0.6346 | Recall = 0.8549 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5775 | Kappa = 0.3175 | Precision = 0.4154 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 22 | TMV: ['feltness', 'agency', 'agenthood', 'eventfulness', 'temporality', 'concreteness', 'setting'] | POS: ['vbp', 'prp', 'vbz', '-rrb-', 'cd', 'nns', 'jj', '-lrb-', 'vbd', 'cc', 'nnp', 'in', 'dt', 'vbn', 'nn']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POS Train: (12348, 15) | POS Test: (416, 15) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'nn', 'nnp', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 22) 12348 | (416, 22)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6758140284911671\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.570333305953549, pvalue=3.9126573549832846e-10)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6393303890874829, pvalue=7.327218279001765e-14)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5510077901742285, pvalue=1.1251271759361446e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5629450308017713, pvalue=1.08479855516656e-09)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7237 | Kappa = 0.4056 | Precision = 0.6274 | Recall = 0.8549 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5729 | Kappa = 0.3086 | Precision = 0.4106 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 23 | TMV: ['feltness', 'agency', 'agenthood', 'coh_seq', 'eventfulness', 'temporality', 'concreteness', 'setting'] | POS: ['vbp', 'prp', 'vbz', '-rrb-', 'cd', 'nns', 'jj', '-lrb-', 'vbd', 'cc', 'nnp', 'in', 'dt', 'vbn', 'nn']\n",
      "POS Train: (12348, 15) | POS Test: (416, 15) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'nn', 'nnp', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 23) 12348 | (416, 23)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6725846522884007\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5574019253672774, pvalue=1.1646827787921872e-09)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6425031755557575, pvalue=5.04087713785508e-14)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5453650455373659, pvalue=1.7915675011674952e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.574389834418848, pvalue=4.1464968099769964e-10)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7245 | Kappa = 0.4019 | Precision = 0.6231 | Recall = 0.8653 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5654 | Kappa = 0.294 | Precision = 0.403 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 24 | TMV: ['feltness', 'agency', 'agenthood', 'coh_seq', 'eventfulness', 'temporality', 'concreteness', 'setting'] | POS: ['vbp', 'prp', 'vbn', 'vbz', '-rrb-', 'cd', 'nns', 'jj', '-lrb-', 'vbd', 'cc', 'nnp', 'in', 'dt', 'wdt', 'nn']\n",
      "POS Train: (12348, 16) | POS Test: (416, 16) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'nn', 'nnp', 'nns', 'prp', 'rrb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 24) 12348 | (416, 24)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6758452958500278\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5458457419286926, pvalue=2.9698965209787628e-09)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\t SCIENCE -- SpearmanrResult(correlation=0.610445971926742, pvalue=1.8243430609974862e-12)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5565727396184873, pvalue=7.050880548324212e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.6058840605319473, pvalue=2.405499390324594e-11)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7293 | Kappa = 0.4274 | Precision = 0.6417 | Recall = 0.8446 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5815 | Kappa = 0.3269 | Precision = 0.4213 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 25 | TMV: ['feltness', 'agency', 'agenthood', 'coh_seq', 'eventfulness', 'temporality', 'concreteness', 'setting'] | POS: ['vbp', 'prp', 'vbn', 'vbz', 'to', '-rrb-', 'cd', 'nns', 'jj', '-lrb-', 'vbd', 'cc', 'nnp', 'in', 'dt', 'wdt', 'nn']\n",
      "POS Train: (12348, 17) | POS Test: (416, 17) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'nn', 'nnp', 'nns', 'prp', 'rrb', 'to', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 25) 12348 | (416, 25)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6772056344095315\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5564851272688197, pvalue=1.2561118999358446e-09)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6191711347144976, pvalue=7.152214387866483e-13)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5461844882327926, pvalue=1.675428772989812e-09)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.6061722822389881, pvalue=2.3402368712526308e-11)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7257 | Kappa = 0.4142 | Precision = 0.6332 | Recall = 0.8497 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5737 | Kappa = 0.3118 | Precision = 0.4131 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 26 | TMV: ['feltness', 'agency', 'agenthood', 'coh_seq', 'eventfulness', 'temporality', 'concreteness', 'setting'] | POS: ['vbp', 'prp', 'vbn', 'vbz', 'to', '-rrb-', 'cd', 'nns', 'jj', '-lrb-', 'vbd', 'cc', 'md', 'in', 'nnp', 'dt', 'wdt', 'nn']\n",
      "POS Train: (12348, 18) | POS Test: (416, 18) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rrb', 'to', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 26) 12348 | (416, 26)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6800480457790367\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.558873329722827, pvalue=1.0311549014783781e-09)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6318344491273484, pvalue=1.7430268851619503e-13)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5527452161423801, pvalue=9.73276062422455e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.615005076636008, pvalue=9.937643210250874e-12)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7341 | Kappa = 0.429 | Precision = 0.6374 | Recall = 0.8653 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5745 | Kappa = 0.3115 | Precision = 0.4122 | Recall = 0.9474 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 27 | TMV: ['feltness', 'agency', 'agenthood', 'coh_seq', 'eventfulness', 'temporality', 'concreteness', 'setting'] | POS: ['vbz', 'cd', 'vbd', 'cc', 'in', 'dt', 'to', 'wdt', 'nns', 'rp', 'vbn', 'nn', 'prp', '-rrb-', 'md', 'nnp', 'vbp', 'jj', '-lrb-']\n",
      "POS Train: (12348, 19) | POS Test: (416, 19) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rp', 'rrb', 'to', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 27) 12348 | (416, 27)\n",
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6762663629493533\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5448666921075, pvalue=3.209806431800213e-09)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6188881815353092, pvalue=7.376092123252372e-13)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5710841362121837, pvalue=2.0000148877929268e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.609991219857278, pvalue=1.6212264183307508e-11)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7273 | Kappa = 0.4187 | Precision = 0.6357 | Recall = 0.8497 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5753 | Kappa = 0.3148 | Precision = 0.4147 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 28 | TMV: ['feltness', 'agency', 'agenthood', 'temporal_order', 'coh_seq', 'eventfulness', 'temporality', 'concreteness', 'setting'] | POS: ['vbz', 'cd', 'vbd', 'cc', 'in', 'dt', 'to', 'wdt', 'nns', 'rp', 'vbn', 'nn', 'prp', '-rrb-', 'md', 'nnp', 'vbp', 'jj', '-lrb-']\n",
      "POS Train: (12348, 19) | POS Test: (416, 19) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rp', 'rrb', 'to', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 28) 12348 | (416, 28)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6742799267960211\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5444818632760486, pvalue=3.3091026216198564e-09)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6112159756602705, pvalue=1.6815848733510045e-12)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5786302698947527, pvalue=1.0134495178885001e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5996332522605009, pvalue=4.3386203753005295e-11)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7273 | Kappa = 0.4187 | Precision = 0.6357 | Recall = 0.8497 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5753 | Kappa = 0.3148 | Precision = 0.4147 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 29 | TMV: ['feltness', 'agency', 'agenthood', 'temporal_order', 'coh_seq', 'eventfulness', 'temporality', 'concreteness', 'setting'] | POS: ['vbz', 'cd', 'vbd', 'cc', 'in', 'dt', 'to', 'wdt', 'nns', 'rp', 'vbn', 'nn', 'prp', 'vb', '-rrb-', 'md', 'nnp', 'vbp', 'jj', '-lrb-']\n",
      "POS Train: (12348, 20) | POS Test: (416, 20) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 29) 12348 | (416, 29)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6735717836525418\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5437631388408378, pvalue=3.502492399298063e-09)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.608293672334228, pvalue=2.288317137292469e-12)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5786665743179679, pvalue=1.0100980771717112e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5963126980106356, pvalue=5.904538852632336e-11)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7301 | Kappa = 0.4237 | Precision = 0.6371 | Recall = 0.8549 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5737 | Kappa = 0.3118 | Precision = 0.4131 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 30 | TMV: ['saying', 'feltness', 'agency', 'agenthood', 'temporal_order', 'coh_seq', 'eventfulness', 'temporality', 'concreteness', 'setting'] | POS: ['vbz', 'cd', 'vbd', 'cc', 'in', 'dt', 'to', 'wdt', 'nns', 'rp', 'vbn', 'nn', 'prp', 'vb', '-rrb-', 'md', 'nnp', 'vbp', 'jj', '-lrb-']\n",
      "POS Train: (12348, 20) | POS Test: (416, 20) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 30) 12348 | (416, 30)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6734964084727815\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5442894488603229, pvalue=3.3598483938223414e-09)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6080942135357841, pvalue=2.3366764248642786e-12)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5788584691263907, pvalue=9.925598669701754e-11)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.596096531730355, pvalue=6.02345351701807e-11)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7301 | Kappa = 0.4237 | Precision = 0.6371 | Recall = 0.8549 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5737 | Kappa = 0.3118 | Precision = 0.4131 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 31 | TMV: ['saying', 'feltness', 'agency', 'agenthood', 'temporal_order', 'coh_seq', 'eventfulness', 'temporality', 'concreteness', 'setting'] | POS: ['vbz', 'cd', 'vbd', 'cc', 'in', 'dt', 'to', 'wdt', 'nns', 'rp', 'vbn', 'nn', 'prp', 'vb', '-rrb-', 'md', 'nnp', 'vbp', 'rb', 'jj', '-lrb-']\n",
      "POS Train: (12348, 21) | POS Test: (416, 21) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 31) 12348 | (416, 31)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6733962695448032\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5431349623659688, pvalue=3.6803493471179123e-09)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6098800655683655, pvalue=1.936660307672323e-12)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5785317293174547, pvalue=1.0226003516018845e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5958803654500745, pvalue=6.14467199492123e-11)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7301 | Kappa = 0.4237 | Precision = 0.6371 | Recall = 0.8549 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5737 | Kappa = 0.3118 | Precision = 0.4131 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 32 | TMV: ['saying', 'feltness', 'agency', 'agenthood', 'temporal_order', 'coh_seq', 'eventfulness', 'temporality', 'concreteness', 'setting'] | POS: ['pos', 'vbz', 'cd', 'vbd', 'cc', 'in', 'dt', 'to', 'wdt', 'nns', 'rp', 'vbn', 'nn', 'prp', 'vb', '-rrb-', 'md', 'nnp', 'vbp', 'rb', 'jj', '-lrb-']\n",
      "POS Train: (12348, 22) | POS Test: (416, 22) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 32) 12348 | (416, 32)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6745603324702848\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.546066452582025, pvalue=2.9182385013461624e-09)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6101212715571818, pvalue=1.8879930743717516e-12)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5794860170133946, pvalue=9.372292667984775e-11)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5968471090924402, pvalue=5.620175969632546e-11)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7301 | Kappa = 0.4237 | Precision = 0.6371 | Recall = 0.8549 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5737 | Kappa = 0.3118 | Precision = 0.4131 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 33 | TMV: ['saying', 'feltness', 'agency', 'agenthood', 'temporal_order', 'coh_seq', 'eventfulness', 'temporality', 'concreteness', 'setting'] | POS: ['pos', 'vbz', 'cd', 'vbg', 'vbd', 'cc', 'in', 'dt', 'to', 'wdt', 'nns', 'rp', 'vbn', 'nn', 'prp', 'vb', '-rrb-', 'md', 'nnp', 'vbp', 'rb', 'jj', '-lrb-']\n",
      "POS Train: (12348, 23) | POS Test: (416, 23) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 33) 12348 | (416, 33)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6740851519952242\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5419917943666572, pvalue=4.026460870286268e-09)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6070737266600231, pvalue=2.5999649496886996e-12)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5840655606846719, pvalue=6.143942003463116e-11)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.5975676633600423, pvalue=5.257567156666124e-11)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7289 | Kappa = 0.4233 | Precision = 0.6381 | Recall = 0.8497 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5768 | Kappa = 0.3178 | Precision = 0.4163 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 34 | TMV: ['saying', 'feltness', 'agency', 'agenthood', 'temporal_order', 'coh_seq', 'eventfulness', 'temporality', 'concreteness', 'setting'] | POS: ['pos', 'vbz', 'cd', 'vbg', 'vbd', 'cc', 'in', 'dt', 'to', 'wdt', 'nns', 'rp', 'vbn', 'nn', 'prp', 'vb', '-rrb-', 'wp', 'md', 'nnp', 'vbp', 'rb', 'jj', '-lrb-']\n",
      "POS Train: (12348, 24) | POS Test: (416, 24) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt', 'wp']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 34) 12348 | (416, 34)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6760298149571183\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5473793980069769, pvalue=2.628295469660427e-09)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6118468220925591, pvalue=1.5727331225609694e-12)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.579568998552172, pvalue=9.301390663131059e-11)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.6036503423023815, pvalue=2.974249147432924e-11)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7248 | Kappa = 0.4179 | Precision = 0.6378 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5815 | Kappa = 0.3269 | Precision = 0.4213 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 35 | TMV: ['saying', 'feltness', 'agency', 'agenthood', 'temporal_order', 'coh_seq', 'eventfulness', 'temporality', 'concreteness', 'setting'] | POS: ['pos', 'vbz', 'cd', 'vbg', 'vbd', 'cc', 'in', 'dt', 'to', 'wdt', 'nns', 'rp', 'vbn', 'nn', 'prp', 'vb', '-rrb-', 'wp', 'md', 'nnp', 'wrb', 'vbp', 'rb', 'jj', '-lrb-']\n",
      "POS Train: (12348, 25) | POS Test: (416, 25) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt', 'wp', 'wrb']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 35) 12348 | (416, 35)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6755419607792663\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5447478479095519, pvalue=3.2401624779186337e-09)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6121019438114994, pvalue=1.5306750153379667e-12)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5827430424104072, pvalue=6.945495137979702e-11)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.6035782868756213, pvalue=2.9945994671607234e-11)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7265 | Kappa = 0.4225 | Precision = 0.6403 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5831 | Kappa = 0.3299 | Precision = 0.4229 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 36 | TMV: ['saying', 'feltness', 'agency', 'agenthood', 'temporal_order', 'coh_seq', 'eventfulness', 'temporality', 'concreteness', 'setting'] | POS: ['pos', 'vbz', 'cd', 'vbg', 'vbd', 'cc', 'in', 'dt', 'to', 'wdt', 'nns', 'rp', 'vbn', 'nn', 'prp', 'vb', 'nnps', '-rrb-', 'wp', 'md', 'nnp', 'wrb', 'vbp', 'rb', 'jj', '-lrb-']\n",
      "POS Train: (12348, 26) | POS Test: (416, 26) | POS feature-columns: ['cc', 'cd', 'dt', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nnps', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt', 'wp', 'wrb']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 36) 12348 | (416, 36)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6748947681406597\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5464569406609978, pvalue=2.8289468065622195e-09)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6117818820186472, pvalue=1.5836159489323376e-12)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5795534395136513, pvalue=9.314645320868009e-11)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.6024314046663549, pvalue=3.337142580726146e-11)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7281 | Kappa = 0.427 | Precision = 0.6429 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5847 | Kappa = 0.333 | Precision = 0.4246 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 37 | TMV: ['saying', 'feltness', 'agency', 'agenthood', 'temporal_order', 'coh_seq', 'eventfulness', 'temporality', 'concreteness', 'setting'] | POS: ['pos', 'vbz', 'cd', 'vbg', 'vbd', 'cc', 'in', 'dt', 'to', 'wdt', 'nns', 'rp', 'vbn', 'nn', 'prp', 'vb', 'nnps', '-rrb-', 'ex', 'wp', 'md', 'nnp', 'wrb', 'vbp', 'rb', 'jj', '-lrb-']\n",
      "POS Train: (12348, 27) | POS Test: (416, 27) | POS feature-columns: ['cc', 'cd', 'dt', 'ex', 'in', 'jj', 'lrb', 'md', 'nn', 'nnp', 'nnps', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt', 'wp', 'wrb']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 37) 12348 | (416, 37)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.673851855808311\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5455910757902321, pvalue=3.0305914499348695e-09)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6133079737555805, pvalue=1.3461395520293712e-12)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5694763688983718, pvalue=2.3066063531680931e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.6075773630608118, pvalue=2.0457553669698328e-11)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7265 | Kappa = 0.4225 | Precision = 0.6403 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5831 | Kappa = 0.3299 | Precision = 0.4229 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 38 | TMV: ['saying', 'feltness', 'agency', 'agenthood', 'temporal_order', 'coh_seq', 'eventfulness', 'temporality', 'concreteness', 'setting'] | POS: ['pos', 'vbz', 'cd', 'vbg', 'vbd', 'cc', 'in', 'dt', 'to', 'wdt', 'nns', 'rp', 'vbn', 'nn', 'prp', 'vb', 'nnps', '-rrb-', 'ex', 'wp', 'md', 'nnp', 'wrb', 'vbp', 'rb', 'jj', '-lrb-', 'jjs']\n",
      "POS Train: (12348, 28) | POS Test: (416, 28) | POS feature-columns: ['cc', 'cd', 'dt', 'ex', 'in', 'jj', 'jjs', 'lrb', 'md', 'nn', 'nnp', 'nnps', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt', 'wp', 'wrb']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 38) 12348 | (416, 38)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.6741589429621359\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.5504636879061094, pvalue=2.0518233957294848e-09)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6118932078596392, pvalue=1.5650039853088248e-12)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.5684650313945223, pvalue=2.522118148381351e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.6074812891584649, pvalue=2.0646973556671076e-11)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7265 | Kappa = 0.4225 | Precision = 0.6403 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5831 | Kappa = 0.3299 | Precision = 0.4229 | Recall = 0.9386 | Total Items = 416\n",
      "\n",
      "###########################################\n",
      "\n",
      "Total Features: 39 | TMV: ['saying', 'feltness', 'agency', 'agenthood', 'temporal_order', 'coh_seq', 'eventfulness', 'temporality', 'concreteness', 'setting'] | POS: ['pos', 'vbz', 'cd', 'vbg', 'vbd', 'cc', 'in', 'dt', 'to', 'wdt', 'jjr', 'nns', 'rp', 'vbn', 'nn', 'prp', 'vb', 'nnps', '-rrb-', 'ex', 'wp', 'md', 'nnp', 'wrb', 'vbp', 'rb', 'jj', '-lrb-', 'jjs']\n",
      "POS Train: (12348, 29) | POS Test: (416, 29) | POS feature-columns: ['cc', 'cd', 'dt', 'ex', 'in', 'jj', 'jjr', 'jjs', 'lrb', 'md', 'nn', 'nnp', 'nnps', 'nns', 'pos', 'prp', 'rb', 'rp', 'rrb', 'to', 'vb', 'vbd', 'vbg', 'vbn', 'vbp', 'vbz', 'wdt', 'wp', 'wrb']\n",
      "Train files: 12348 | Annotated files: 416 (12348, 39) 12348 | (416, 39)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "Correlation - All Data: 0.674058470515663\n",
      "\t 19CNONFIC -- SpearmanrResult(correlation=0.550916427707817, pvalue=1.978167957259833e-09)\n",
      "\t SCIENCE -- SpearmanrResult(correlation=0.6117030262146111, pvalue=1.5969287204619135e-12)\n",
      "\t NOVEL19C -- SpearmanrResult(correlation=0.568356118124877, pvalue=2.546450270001521e-10)\n",
      "\t POETRY -- SpearmanrResult(correlation=0.6069528826955568, pvalue=2.1719373858308486e-11)\n",
      "\n",
      "Threshold = 2.5\n",
      "F1 = 0.7265 | Kappa = 0.4225 | Precision = 0.6403 | Recall = 0.8394 | Total Items = 416\n",
      "\n",
      "Threshold = 3\n",
      "F1 = 0.5831 | Kappa = 0.3299 | Precision = 0.4229 | Recall = 0.9386 | Total Items = 416\n"
     ]
    }
   ],
   "source": [
    "## Run for LogReg\n",
    "\n",
    "algo = LogisticRegression() # the best pos-TMV parameters\n",
    "with open('/Users/sunyambagga/Desktop/txtLAB-2/detecting-narrativity/results/topN_reader_annotated_logreg.tsv', 'w') as F:\n",
    "    F.write(\"Top-N\\tSpearman-Correlation\\tF1-2.5\\tKappa-2.5\\tF1-3\\tKappa-3\\n\")\n",
    "    for n in range(1,40):\n",
    "        print(\"\\n###########################################\\n\")\n",
    "        corr, f12, kap2, f13, kap3 = process(algo, n)\n",
    "        F.write(str(n)+'\\t'+corr+'\\t'+f12+'\\t'+kap2+'\\t'+f13+'\\t'+kap3+'\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
